Date: (Mon) Feb 08, 2016
Data: Source: Training: https://www.kaggle.com/c/yelp-restaurant-photo-classification/download/train.csv.tgz
New: https://www.kaggle.com/c/yelp-restaurant-photo-classification/download/test.csv.tgz
Time period:
Based on analysis utilizing <> techniques,
Summary of key steps & error improvement stats:
Use plot.ly for interactive plots ?
varImp for randomForest crashes in caret version:6.0.41 -> submit bug report
extensions toward multiclass classification are scheduled for the next release
rm(list = ls())
set.seed(12345)
options(stringsAsFactors = FALSE)
source("~/Dropbox/datascience/R/myscript.R")
source("~/Dropbox/datascience/R/mydsutils.R")
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
source("~/Dropbox/datascience/R/myplot.R")
source("~/Dropbox/datascience/R/mypetrinet.R")
source("~/Dropbox/datascience/R/myplclust.R")
source("~/Dropbox/datascience/R/mytm.R")
# Gather all package requirements here
suppressPackageStartupMessages(require(doMC))
glbCores <- 6 # of cores on machine - 2
registerDoMC(glbCores)
suppressPackageStartupMessages(require(caret))
require(plyr)
## Loading required package: plyr
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(knitr)
## Loading required package: knitr
require(stringr)
## Loading required package: stringr
#source("dbgcaret.R")
#packageVersion("snow")
#require(sos); findFn("cosine", maxPages=2, sortby="MaxScore")
# Analysis control global variables
# Inputs
# url/name = "<pointer>"; if url specifies a zip file, name = "<filename>"
# sep = choose from c(NULL, "\t")
glbObsTrnFile <- list(url = "https://www.kaggle.com/c/yelp-restaurant-photo-classification/download/train.csv.tgz",
name = "train_color.csv")
glbObsNewFile <- list(url = "https://www.kaggle.com/c/yelp-restaurant-photo-classification/download/test.csv.tgz",
name = "test_color.csv") # default OR
#list(splitSpecs = list(method = NULL #select from c(NULL, "condition", "sample", "copy")
# ,nRatio = 0.3 # > 0 && < 1 if method == "sample"
# ,seed = 123 # any integer or glbObsTrnPartitionSeed if method == "sample"
# ,condition = # or 'is.na(<var>)'; '<var> <condition_operator> <value>'
# )
# )
glbInpMerge <- NULL #: default
# list(fnames = c("<fname1>", "<fname2>")) # files will be concatenated
glb_is_separate_newobs_dataset <- TRUE # or TRUE
glb_split_entity_newobs_datasets <- TRUE # FALSE not supported - use "copy" for glbObsNewFile$splitSpecs$method # select from c(FALSE, TRUE)
glbObsDropCondition <- NULL # : default
# enclose in single-quotes b/c condition might include double qoutes
# use | & ; NOT || &&
# '<condition>'
# 'grepl("^First Draft Video:", glbObsAll$Headline)'
# '(is.na(glbObsAll[, glb_rsp_var_raw]) & grepl("Train", glbObsAll[, glbFeatsId]))'
#nrow(do.call("subset",list(glbObsAll, parse(text=paste0("!(", glbObsDropCondition, ")")))))
glb_obs_repartition_train_condition <- NULL # : default
# "<condition>"
glb_max_fitobs <- NULL # or any integer
glbObsTrnPartitionSeed <- 123 # or any integer
glb_is_regression <- FALSE; glb_is_classification <- !glb_is_regression;
glb_is_binomial <- TRUE # or TRUE or FALSE
glb_rsp_var_raw <- "outdoor"
# for classification, the response variable has to be a factor
glb_rsp_var <- "outdoor.fctr"
# if the response factor is based on numbers/logicals e.g (0/1 OR TRUE/FALSE vs. "A"/"B"),
# or contains spaces (e.g. "Not in Labor Force")
# caret predict(..., type="prob") crashes
glb_map_rsp_raw_to_var <- #NULL
function(raw) {
# return(raw ^ 0.5)
# return(log(raw))
# return(log(1 + raw))
# return(log10(raw))
# return(exp(-raw / 2))
# ret_vals <- rep_len(NA, length(raw)); ret_vals[!is.na(raw)] <- ifelse(raw[!is.na(raw)] == 1, "Y", "N"); return(relevel(as.factor(ret_vals), ref="N"))
ret_vals <- rep_len(NA, length(raw)); ret_vals[!is.na(raw)] <- ifelse(raw[!is.na(raw)] != -1, "Y", "N"); return(relevel(as.factor(ret_vals), ref = "N"))
# as.factor(paste0("B", raw))
# as.factor(gsub(" ", "\\.", raw))
}
#if glb_rsp_var_raw is numeric:
#print(summary(glbObsAll[, glb_rsp_var_raw]))
#glb_map_rsp_raw_to_var(tst <- c(NA, as.numeric(summary(glbObsAll[, glb_rsp_var_raw]))))
#if glb_rsp_var_raw is character:
#print(table(glbObsAll[, glb_rsp_var_raw], useNA = "ifany"))
#print(table(glb_map_rsp_raw_to_var(tst <- glbObsAll[, glb_rsp_var_raw]), useNA = "ifany"))
glb_map_rsp_var_to_raw <- #NULL
function(var) {
# return(var ^ 2.0)
# return(exp(var))
# return(10 ^ var)
# return(-log(var) * 2)
# as.numeric(var)
levels(var)[as.numeric(var)]
# gsub("\\.", " ", levels(var)[as.numeric(var)])
# c("<=50K", " >50K")[as.numeric(var)]
# c(FALSE, TRUE)[as.numeric(var)]
}
#print(table(glb_map_rsp_var_to_raw(glb_map_rsp_raw_to_var(tst)), useNA = "ifany"))
if ((glb_rsp_var != glb_rsp_var_raw) && is.null(glb_map_rsp_raw_to_var))
stop("glb_map_rsp_raw_to_var function expected")
# List info gathered for various columns
# <col_name>: <description>; <notes>
# currently does not handle more than 1 column; consider concatenating multiple columns
# If glbFeatsId == NULL, ".rownames <- as.numeric(row.names())" is the default
glbFeatsId <- "business_id" # choose from c(NULL : default, "<id_feat>")
glbFeatsCategory <- "lumG.mad.mean.cut.fctr"
#"nImgs.cut.fctr" # choose from c(NULL : default, "<category_feat>")
# User-specified exclusions
glbFeatsExclude <- c(NULL
# Feats that shd be excluded due to known causation by prediction variable
# , "<feat1", "<feat2>"
,"labels"
,"lunch","dinner","reserve","outdoor","expensive","liquor","table","classy","kids"
# Feats that are linear combinations (alias in glm)
# Feature-engineering phase -> start by excluding all features except id & category & work each one in
,"business_id"
,"imgResYLst"
,"imgResXLst","imgResYLst","imgResXYLst"
,"imgLumR.meanLst","imgLumR.madLst","imgLumB.meanLst","imgLumB.madLst","imgLumG.meanLst","imgLumG.madLst","imgCorRBLst","imgCorBGLst","imgCorGRLst","imgCosSmlRBLst","imgCosSmlBGLst","imgCosSmlGRLst"
)
if (glb_rsp_var_raw != glb_rsp_var)
glbFeatsExclude <- union(glbFeatsExclude, glb_rsp_var_raw)
glbFeatsInteractionOnly <- list()
#glbFeatsInteractionOnly[["<child_feat>"]] <- "<parent_feat>"
glbFeatsDrop <- c(NULL
# , "<feat1>", "<feat2>"
)
glb_map_vars <- NULL # or c("<var1>", "<var2>")
glb_map_urls <- list();
# glb_map_urls[["<var1>"]] <- "<var1.url>"
glb_assign_pairs_lst <- NULL;
# glb_assign_pairs_lst[["<var1>"]] <- list(from=c(NA),
# to=c("NA.my"))
glb_assign_vars <- names(glb_assign_pairs_lst)
# Derived features; Use this mechanism to cleanse data ??? Cons: Data duplication ???
glbFeatsDerive <- list();
# glbFeatsDerive[["<feat.my.sfx>"]] <- list(
# mapfn = function(<arg1>, <arg2>) { return(function(<arg1>, <arg2>)) }
# , args = c("<arg1>", "<arg2>"))
#myprint_df(data.frame(ImageId = mapfn(glbObsAll$.src, glbObsAll$.pos)))
#data.frame(ImageId = mapfn(glbObsAll$.src, glbObsAll$.pos))[7045:7055, ]
# character
# mapfn = function(Week) { return(substr(Week, 1, 10)) }
# mapfn = function(Name) { return(sapply(Name, function(thsName)
# str_sub(unlist(str_split(thsName, ","))[1], 1, 1))) }
# mapfn = function(descriptor) { return(plyr::revalue(descriptor, c(
# "ABANDONED BUILDING" = "OTHER",
# "**" = "**"
# ))) }
# mapfn = function(description) { mod_raw <- description;
# This is here because it does not work if it's in txt_map_filename
# mod_raw <- gsub(paste0(c("\n", "\211", "\235", "\317", "\333"), collapse = "|"), " ", mod_raw)
# Don't parse for "." because of ".com"; use customized gsub for that text
# mod_raw <- gsub("(\\w)(!|\\*|,|-|/)(\\w)", "\\1\\2 \\3", mod_raw);
# Some state acrnoyms need context for separation e.g.
# LA/L.A. could either be "Louisiana" or "LosAngeles"
# modRaw <- gsub("\\bL\\.A\\.( |,|')", "LosAngeles\\1", modRaw);
# OK/O.K. could either be "Oklahoma" or "Okay"
# modRaw <- gsub("\\bACA OK\\b", "ACA OKay", modRaw);
# modRaw <- gsub("\\bNow O\\.K\\.\\b", "Now OKay", modRaw);
# PR/P.R. could either be "PuertoRico" or "Public Relations"
# modRaw <- gsub("\\bP\\.R\\. Campaign", "PublicRelations Campaign", modRaw);
# VA/V.A. could either be "Virginia" or "VeteransAdministration"
# modRaw <- gsub("\\bthe V\\.A\\.\\:", "the VeteranAffairs:", modRaw);
#
# Custom mods
# return(mod_raw) }
# numeric
# Create feature based on record position/id in data
glbFeatsDerive[[".pos"]] <- list(
mapfn = function(.rnorm) { return(1:length(.rnorm)) }
, args = c(".rnorm"))
# glbFeatsDerive[[".pos.y"]] <- list(
# mapfn = function(.rnorm) { return(1:length(.rnorm)) }
# , args = c(".rnorm"))
glbFeatsDerive[["nImgs.log1p"]] <- list(
mapfn = function(nImgs) { return(log1p(nImgs)) }
, args = c("nImgs"))
glbFeatsDerive[["nImgs.root2"]] <- list(
mapfn = function(nImgs) { return(nImgs ^ (1/2)) }
, args = c("nImgs"))
glbFeatsDerive[["nImgs.nexp"]] <- list(
mapfn = function(nImgs) { return(exp(-nImgs)) }
, args = c("nImgs"))
glbFeatsDerive[["resX.mean"]] <- list(
mapfn = function(imgResXLst) { return(sapply(imgResXLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgResXLst"))
glbFeatsDerive[["resX.mad"]] <- list(
mapfn = function(imgResXLst) { return(sapply(imgResXLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgResXLst"))
glbFeatsDerive[["resX.mean.log1p"]] <- list(
mapfn = function(resX.mean) { return(log1p(resX.mean)) }
, args = c("resX.mean"))
glbFeatsDerive[["resX.mean.root2"]] <- list(
mapfn = function(resX.mean) { return(resX.mean ^ (1/2)) }
, args = c("resX.mean"))
glbFeatsDerive[["resX.mean.nexp"]] <- list(
mapfn = function(resX.mean) { return(exp(-resX.mean)) }
, args = c("resX.mean"))
glbFeatsDerive[["resX.mad.log1p"]] <- list(
mapfn = function(resX.mad) { return(log1p(resX.mad)) }
, args = c("resX.mad"))
glbFeatsDerive[["resX.mad.root2"]] <- list(
mapfn = function(resX.mad) { return(resX.mad ^ (1/2)) }
, args = c("resX.mad"))
glbFeatsDerive[["resX.mad.nexp"]] <- list(
mapfn = function(resX.mad) { return(exp(-resX.mad)) }
, args = c("resX.mad"))
glbFeatsDerive[["resY.mean"]] <- list(
mapfn = function(imgResYLst) { return(sapply(imgResYLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgResYLst"))
glbFeatsDerive[["resY.mad"]] <- list(
mapfn = function(imgResYLst) { return(sapply(imgResYLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgResYLst"))
glbFeatsDerive[["resY.mean.log1p"]] <- list(
mapfn = function(resY.mean) { return(log1p(resY.mean)) }
, args = c("resY.mean"))
glbFeatsDerive[["resY.mean.root2"]] <- list(
mapfn = function(resY.mean) { return(resY.mean ^ (1/2)) }
, args = c("resY.mean"))
glbFeatsDerive[["resY.mean.nexp"]] <- list(
mapfn = function(resY.mean) { return(exp(-resY.mean)) }
, args = c("resY.mean"))
glbFeatsDerive[["resY.mad.log1p"]] <- list(
mapfn = function(resY.mad) { return(log1p(resY.mad)) }
, args = c("resY.mad"))
glbFeatsDerive[["resY.mad.root2"]] <- list(
mapfn = function(resY.mad) { return(resY.mad ^ (1/2)) }
, args = c("resY.mad"))
glbFeatsDerive[["resY.mad.nexp"]] <- list(
mapfn = function(resY.mad) { return(exp(-resY.mad)) }
, args = c("resY.mad"))
glbFeatsDerive[["resXY.mean"]] <- list(
mapfn = function(imgResXLst, imgResYLst) {
resXYAll <- c()
for (obsIx in 1:length(imgResXLst)) {
resX <- as.numeric(unlist(str_split(imgResXLst[obsIx], ",")))
resY <- as.numeric(unlist(str_split(imgResYLst[obsIx], ",")))
resXYAll <- c(resXYAll, mean(resX * resY))
}
return(resXYAll)
}
, args = c("imgResXLst","imgResYLst"))
glbFeatsDerive[["resXY.mad"]] <- list(
mapfn = function(imgResXLst, imgResYLst) {
resXYAll <- c()
for (obsIx in 1:length(imgResXLst)) {
resX <- as.numeric(unlist(str_split(imgResXLst[obsIx], ",")))
resY <- as.numeric(unlist(str_split(imgResYLst[obsIx], ",")))
resXYAll <- c(resXYAll, mad(resX * resY))
}
return(resXYAll)
}
, args = c("imgResXLst","imgResYLst"))
glbFeatsDerive[["resXY.mean.log1p"]] <- list(
mapfn = function(resXY.mean) { return(log1p(resXY.mean)) }
, args = c("resXY.mean"))
glbFeatsDerive[["resXY.mean.root2"]] <- list(
mapfn = function(resXY.mean) { return(resXY.mean ^ (1/2)) }
, args = c("resXY.mean"))
glbFeatsDerive[["resXY.mean.nexp"]] <- list(
mapfn = function(resXY.mean) { return(exp(-resXY.mean)) }
, args = c("resXY.mean"))
glbFeatsDerive[["resXY.mad.log1p"]] <- list(
mapfn = function(resXY.mad) { return(log1p(resXY.mad)) }
, args = c("resXY.mad"))
glbFeatsDerive[["resXY.mad.root2"]] <- list(
mapfn = function(resXY.mad) { return(resXY.mad ^ (1/2)) }
, args = c("resXY.mad"))
glbFeatsDerive[["resXY.mad.nexp"]] <- list(
mapfn = function(resXY.mad) { return(exp(-resXY.mad)) }
, args = c("resXY.mad"))
glbFeatsDerive[["lumR.mean.mean"]] <- list(
mapfn = function(imgLumR.meanLst) { return(sapply(imgLumR.meanLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumR.meanLst"))
glbFeatsDerive[["lumR.mean.mad"]] <- list(
mapfn = function(imgLumR.meanLst) { return(sapply(imgLumR.meanLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumR.meanLst"))
glbFeatsDerive[["lumR.mad.mean"]] <- list(
mapfn = function(imgLumR.madLst) { return(sapply(imgLumR.madLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumR.madLst"))
glbFeatsDerive[["lumR.mad.mad"]] <- list(
mapfn = function(imgLumR.madLst) { return(sapply(imgLumR.madLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumR.madLst"))
glbFeatsDerive[["lumB.mean.mean"]] <- list(
mapfn = function(imgLumB.meanLst) { return(sapply(imgLumB.meanLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumB.meanLst"))
glbFeatsDerive[["lumB.mean.mad"]] <- list(
mapfn = function(imgLumB.meanLst) { return(sapply(imgLumB.meanLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumB.meanLst"))
glbFeatsDerive[["lumB.mad.mean"]] <- list(
mapfn = function(imgLumB.madLst) { return(sapply(imgLumB.madLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumB.madLst"))
glbFeatsDerive[["lumB.mad.mad"]] <- list(
mapfn = function(imgLumB.madLst) { return(sapply(imgLumB.madLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumB.madLst"))
glbFeatsDerive[["lumG.mean.mean"]] <- list(
mapfn = function(imgLumG.meanLst) { return(sapply(imgLumG.meanLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumG.meanLst"))
glbFeatsDerive[["lumG.mean.mad"]] <- list(
mapfn = function(imgLumG.meanLst) { return(sapply(imgLumG.meanLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumG.meanLst"))
glbFeatsDerive[["lumG.mad.mean"]] <- list(
mapfn = function(imgLumG.madLst) { return(sapply(imgLumG.madLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumG.madLst"))
glbFeatsDerive[["lumG.mad.mad"]] <- list(
mapfn = function(imgLumG.madLst) { return(sapply(imgLumG.madLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ",")))))) }
, args = c("imgLumG.madLst"))
glbFeatsDerive[["CorRB.mean"]] <- list(
mapfn = function(imgCorRBLst) { return(sapply(imgCorRBLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorRBLst"))
glbFeatsDerive[["CorRB.mad"]] <- list(
mapfn = function(imgCorRBLst) { return(sapply(imgCorRBLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorRBLst"))
glbFeatsDerive[["CorBG.mean"]] <- list(
mapfn = function(imgCorBGLst) { return(sapply(imgCorBGLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorBGLst"))
glbFeatsDerive[["CorBG.mad"]] <- list(
mapfn = function(imgCorBGLst) { return(sapply(imgCorBGLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorBGLst"))
glbFeatsDerive[["CorGR.mean"]] <- list(
mapfn = function(imgCorGRLst) { return(sapply(imgCorGRLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorGRLst"))
glbFeatsDerive[["CorGR.mad"]] <- list(
mapfn = function(imgCorGRLst) { return(sapply(imgCorGRLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCorGRLst"))
glbFeatsDerive[["CosSmlRB.mean"]] <- list(
mapfn = function(imgCosSmlRBLst) { return(sapply(imgCosSmlRBLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlRBLst"))
glbFeatsDerive[["CosSmlRB.mad"]] <- list(
mapfn = function(imgCosSmlRBLst) { return(sapply(imgCosSmlRBLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlRBLst"))
glbFeatsDerive[["CosSmlBG.mean"]] <- list(
mapfn = function(imgCosSmlBGLst) { return(sapply(imgCosSmlBGLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlBGLst"))
glbFeatsDerive[["CosSmlBG.mad"]] <- list(
mapfn = function(imgCosSmlBGLst) { return(sapply(imgCosSmlBGLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlBGLst"))
glbFeatsDerive[["CosSmlGR.mean"]] <- list(
mapfn = function(imgCosSmlGRLst) { return(sapply(imgCosSmlGRLst, function(thsObsFeat)
mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlGRLst"))
glbFeatsDerive[["CosSmlGR.mad"]] <- list(
mapfn = function(imgCosSmlGRLst) { return(sapply(imgCosSmlGRLst, function(thsObsFeat)
mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm = TRUE))) }
, args = c("imgCosSmlGRLst"))
glbFeatsDerive[["lumG.mad.mean.cut.fctr"]] <- list(
mapfn = function(lumG.mad.mean) { return(cut(lumG.mad.mean,
breaks = c(0.07, 0.21, 0.22, 0.23, 0.37))) }
, args = c("lumG.mad.mean"))
# Add logs of numerics that are not distributed normally
# Derive & keep multiple transformations of the same feature, if normality is hard to achieve with just one transformation
# Right skew: logp1; sqrt; ^ 1/3; logp1(logp1); log10; exp(-<feat>/constant)
# glbFeatsDerive[["WordCount.log1p"]] <- list(
# mapfn = function(WordCount) { return(log1p(WordCount)) }
# , args = c("WordCount"))
# glbFeatsDerive[["WordCount.root2"]] <- list(
# mapfn = function(WordCount) { return(WordCount ^ (1/2)) }
# , args = c("WordCount"))
# glbFeatsDerive[["WordCount.nexp"]] <- list(
# mapfn = function(WordCount) { return(exp(-WordCount)) }
# , args = c("WordCount"))
#print(summary(glbObsAll$WordCount))
#print(summary(mapfn(glbObsAll$WordCount)))
# mapfn = function(HOSPI.COST) { return(cut(HOSPI.COST, 5, breaks = c(0, 100000, 200000, 300000, 900000), labels = NULL)) }
# mapfn = function(Rasmussen) { return(ifelse(sign(Rasmussen) >= 0, 1, 0)) }
# mapfn = function(startprice) { return(startprice ^ (1/2)) }
# mapfn = function(startprice) { return(log(startprice)) }
# mapfn = function(startprice) { return(exp(-startprice / 20)) }
# mapfn = function(startprice) { return(scale(log(startprice))) }
# mapfn = function(startprice) { return(sign(sprice.predict.diff) * (abs(sprice.predict.diff) ^ (1/10))) }
# factor
glbFeatsDerive[["lunch"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(0)), "0", "-1") })
, levels = c("-1", "0"))) }
, args = c("labels"))
glbFeatsDerive[["dinner"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(1)), "1", "-1") })
, levels = c("-1", "1"))) }
, args = c("labels"))
glbFeatsDerive[["reserve"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(2)), "2", "-1") })
, levels = c("-1", "2"))) }
, args = c("labels"))
glbFeatsDerive[["outdoor"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(3)), "3", "-1") })
, levels = c("-1", "3"))) }
, args = c("labels"))
glbFeatsDerive[["expensive"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(4)), "4", "-1") })
, levels = c("-1", "4"))) }
, args = c("labels"))
glbFeatsDerive[["liquor"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(5)), "5", "-1") })
, levels = c("-1", "5"))) }
, args = c("labels"))
glbFeatsDerive[["table"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(6)), "6", "-1") })
, levels = c("-1", "6"))) }
, args = c("labels"))
glbFeatsDerive[["classy"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(7)), "7", "-1") })
, levels = c("-1", "7"))) }
, args = c("labels"))
glbFeatsDerive[["kids"]] <- list(
mapfn = function(labels) { return(factor(
sapply(labels, function(obsLabel) {if (is.na(obsLabel)) return(NA);
ifelse(any(as.numeric(unlist(str_split(obsLabel, " "))) %in% c(8)), "8", "-1") })
, levels = c("-1", "8"))) }
, args = c("labels"))
glbFeatsDerive[["nImgs.cut.fctr"]] <- list(
mapfn = function(nImgs) { return(cut(nImgs, breaks = c(0, 32, 60, 120, 3000))) }
, args = c("nImgs"))
# mapfn = function(PropR) { return(as.factor(ifelse(PropR >= 0.5, "Y", "N"))) }
# mapfn = function(productline, description) { as.factor(gsub(" ", "", productline)) }
# mapfn = function(purpose) { return(relevel(as.factor(purpose), ref="all_other")) }
# mapfn = function(raw) { tfr_raw <- as.character(cut(raw, 5));
# tfr_raw[is.na(tfr_raw)] <- "NA.my";
# return(as.factor(tfr_raw)) }
# mapfn = function(startprice.log10) { return(cut(startprice.log10, 3)) }
# mapfn = function(startprice.log10) { return(cut(sprice.predict.diff, c(-1000, -100, -10, -1, 0, 1, 10, 100, 1000))) }
# , args = c("<arg1>"))
# multiple args
# mapfn = function(id, date) { return(paste(as.character(id), as.character(date), sep = "#")) }
# mapfn = function(PTS, oppPTS) { return(PTS - oppPTS) }
# mapfn = function(startprice.log10.predict, startprice) {
# return(spdiff <- (10 ^ startprice.log10.predict) - startprice) }
# mapfn = function(productline, description) { as.factor(
# paste(gsub(" ", "", productline), as.numeric(nchar(description) > 0), sep = "*")) }
# mapfn = function(.src, .pos) {
# return(paste(.src, sprintf("%04d",
# ifelse(.src == "Train", .pos, .pos - 7049)
# ), sep = "#")) }
# # If glbObsAll is not sorted in the desired manner
# mapfn=function(Week) { return(coredata(lag(zoo(orderBy(~Week, glbObsAll)$ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI) { return(coredata(lag(zoo(ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI.2.lag) { return(log(ILI.2.lag)) }
# glbFeatsDerive[["<var1>"]] <- glbFeatsDerive[["<var2>"]]
glb_derive_vars <- names(glbFeatsDerive)
# tst <- "descr.my"; args_lst <- NULL; for (arg in glbFeatsDerive[[tst]]$args) args_lst[[arg]] <- glbObsAll[, arg]; print(head(args_lst[[arg]])); print(head(drv_vals <- do.call(glbFeatsDerive[[tst]]$mapfn, args_lst)));
# print(which_ix <- which(args_lst[[arg]] == 0.75)); print(drv_vals[which_ix]);
glbFeatsDateTime <- list()
# glbFeatsDateTime[["<DateTimeFeat>"]] <-
# c(format = "%Y-%m-%d %H:%M:%S", timezone = "America/New_York", impute.na = TRUE,
# last.ctg = TRUE, poly.ctg = TRUE)
glbFeatsPrice <- NULL # or c("<price_var>")
glbFeatsImage <- list() #list(<imageFeat> = list(patchSize = 10)) # if patchSize not specified, no patch computation
glbFeatsText <- list()
Sys.setlocale("LC_ALL", "C") # For english
## [1] "C/C/C/C/C/en_US.UTF-8"
#glbFeatsText[["<TextFeature>"]] <- list(NULL,
# ,names = myreplacePunctuation(str_to_lower(gsub(" ", "", c(NULL,
# <comma-separated-screened-names>
# ))))
# ,rareWords = myreplacePunctuation(str_to_lower(gsub(" ", "", c(NULL,
# <comma-separated-nonSCOWL-words>
# ))))
#)
# Text Processing Step: custom modifications not present in txt_munge -> use glbFeatsDerive
# Text Processing Step: universal modifications
glb_txt_munge_filenames_pfx <- "<projectId>_mytxt_"
# Text Processing Step: tolower
# Text Processing Step: myreplacePunctuation
# Text Processing Step: removeWords
glb_txt_stop_words <- list()
# Remember to use unstemmed words
if (length(glbFeatsText) > 0) {
require(tm)
require(stringr)
glb_txt_stop_words[["<txt_var>"]] <- sort(myreplacePunctuation(str_to_lower(gsub(" ", "", c(NULL
# Remove any words from stopwords
# , setdiff(myreplacePunctuation(stopwords("english")), c("<keep_wrd1>", <keep_wrd2>"))
# Remove salutations
,"mr","mrs","dr","Rev"
# Remove misc
#,"th" # Happy [[:digit::]]+th birthday
# Remove terms present in Trn only or New only; search for "Partition post-stem"
# ,<comma-separated-terms>
# cor.y.train == NA
# ,unlist(strsplit(paste(c(NULL
# ,"<comma-separated-terms>"
# ), collapse=",")
# freq == 1; keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
# chisq.pval high (e.g. == 1); keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
# nzv.freqRatio high (e.g. >= glbFeatsNzvFreqMax); keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
)))))
}
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txtFeat]][grep("^man", glb_post_stem_words_terms_df_lst[[txtFeat]]$term), ])
#glbObsAll[glb_post_stem_words_terms_mtrx_lst[[txtFeat]][, 4866] > 0, c(glb_rsp_var, txtFeat)]
# To identify terms with a specific freq
#paste0(sort(subset(glb_post_stop_words_terms_df_lst[[txtFeat]], freq == 1)$term), collapse = ",")
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], freq <= 2)$term), collapse = ",")
#subset(glb_post_stem_words_terms_df_lst[[txtFeat]], term %in% c("zinger"))
# To identify terms with a specific freq &
# are not stemmed together later OR is value of color.fctr (e.g. gold)
#paste0(sort(subset(glb_post_stop_words_terms_df_lst[[txtFeat]], (freq == 1) & !(term %in% c("blacked","blemish","blocked","blocks","buying","cables","careful","carefully","changed","changing","chargers","cleanly","cleared","connect","connects","connected","contains","cosmetics","default","defaulting","defective","definitely","describe","described","devices","displays","drop","drops","engravement","excellant","excellently","feels","fix","flawlessly","frame","framing","gentle","gold","guarantee","guarantees","handled","handling","having","install","iphone","iphones","keeped","keeps","known","lights","line","lining","liquid","liquidation","looking","lots","manuals","manufacture","minis","most","mostly","network","networks","noted","opening","operated","performance","performs","person","personalized","photograph","physically","placed","places","powering","pre","previously","products","protection","purchasing","returned","rotate","rotation","running","sales","second","seconds","shipped","shuts","sides","skin","skinned","sticker","storing","thats","theres","touching","unusable","update","updates","upgrade","weeks","wrapped","verified","verify") ))$term), collapse = ",")
#print(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], (freq <= 2)))
#glbObsAll[which(terms_mtrx[, 229] > 0), glbFeatsText]
# To identify terms with cor.y == NA
#orderBy(~-freq+term, subset(glb_post_stop_words_terms_df_lst[[txtFeat]], is.na(cor.y)))
#paste(sort(subset(glb_post_stop_words_terms_df_lst[[txtFeat]], is.na(cor.y))[, "term"]), collapse=",")
#orderBy(~-freq+term, subset(glb_post_stem_words_terms_df_lst[[txtFeat]], is.na(cor.y)))
# To identify terms with low cor.y.abs
#head(orderBy(~cor.y.abs+freq+term, subset(glb_post_stem_words_terms_df_lst[[txtFeat]], !is.na(cor.y))), 5)
# To identify terms with high chisq.pval
#subset(glb_post_stem_words_terms_df_lst[[txtFeat]], chisq.pval > 0.99)
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], (chisq.pval > 0.99) & (freq <= 10))$term), collapse=",")
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], (chisq.pval > 0.9))$term), collapse=",")
#head(orderBy(~-chisq.pval+freq+term, glb_post_stem_words_terms_df_lst[[txtFeat]]), 5)
#glbObsAll[glb_post_stem_words_terms_mtrx_lst[[txtFeat]][, 68] > 0, glbFeatsText]
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txtFeat]][grep("^m", glb_post_stem_words_terms_df_lst[[txtFeat]]$term), ])
# To identify terms with high nzv.freqRatio
#summary(glb_post_stem_words_terms_df_lst[[txtFeat]]$nzv.freqRatio)
#paste0(sort(setdiff(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], (nzv.freqRatio >= glbFeatsNzvFreqMax) & (freq < 10) & (chisq.pval >= 0.05))$term, c( "128gb","3g","4g","gold","ipad1","ipad3","ipad4","ipadair2","ipadmini2","manufactur","spacegray","sprint","tmobil","verizon","wifion"))), collapse=",")
# To identify obs with a txt term
#tail(orderBy(~-freq+term, glb_post_stop_words_terms_df_lst[[txtFeat]]), 20)
#mydspObs(list(descr.my.contains="non"), cols=c("color", "carrier", "cellular", "storage"))
#grep("ever", dimnames(terms_stop_mtrx)$Terms)
#which(terms_stop_mtrx[, grep("ipad", dimnames(terms_stop_mtrx)$Terms)] > 0)
#glbObsAll[which(terms_stop_mtrx[, grep("16", dimnames(terms_stop_mtrx)$Terms)[1]] > 0), c(glbFeatsCategory, "storage", txtFeat)]
# Text Processing Step: screen for names # Move to glbFeatsText specs section in order of text processing steps
# glbFeatsText[["<txtFeat>"]]$names <- myreplacePunctuation(str_to_lower(gsub(" ", "", c(NULL
# # Person names for names screening
# ,<comma-separated-list>
#
# # Company names
# ,<comma-separated-list>
#
# # Product names
# ,<comma-separated-list>
# ))))
# glbFeatsText[["<txtFeat>"]]$rareWords <- myreplacePunctuation(str_to_lower(gsub(" ", "", c(NULL
# # Words not in SCOWL db
# ,<comma-separated-list>
# ))))
# To identify char vectors post glbFeatsTextMap
#grep("six(.*)hour", glb_txt_chr_lst[[txtFeat]], ignore.case = TRUE, value = TRUE)
#grep("[S|s]ix(.*)[H|h]our", glb_txt_chr_lst[[txtFeat]], value = TRUE)
# To identify whether terms shd be synonyms
#orderBy(~term, glb_post_stop_words_terms_df_lst[[txtFeat]][grep("^moder", glb_post_stop_words_terms_df_lst[[txtFeat]]$term), ])
# term_row_df <- glb_post_stop_words_terms_df_lst[[txtFeat]][grep("^came$", glb_post_stop_words_terms_df_lst[[txtFeat]]$term), ]
#
# cor(glb_post_stop_words_terms_mtrx_lst[[txtFeat]][glbObsAll$.lcn == "Fit", term_row_df$pos], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
# To identify which stopped words are "close" to a txt term
#sort(cluster_vars)
# Text Processing Step: stemDocument
# To identify stemmed txt terms
#glb_post_stop_words_terms_df_lst[[txtFeat]][grep("^la$", glb_post_stop_words_terms_df_lst[[txtFeat]]$term), ]
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txtFeat]][grep("^con", glb_post_stem_words_terms_df_lst[[txtFeat]]$term), ])
#glbObsAll[which(terms_stem_mtrx[, grep("use", dimnames(terms_stem_mtrx)$Terms)[[1]]] > 0), c(glbFeatsId, "productline", txtFeat)]
#glbObsAll[which(TfIdf_stem_mtrx[, 191] > 0), c(glbFeatsId, glbFeatsCategory, txtFeat)]
#glbObsAll[which(glb_post_stop_words_terms_mtrx_lst[[txtFeat]][, 6165] > 0), c(glbFeatsId, glbFeatsCategory, txtFeat)]
#which(glbObsAll$UniqueID %in% c(11915, 11926, 12198))
# Text Processing Step: mycombineSynonyms
# To identify which terms are associated with not -> combine "could not" & "couldn't"
#findAssocs(glb_full_DTM_lst[[txtFeat]], "not", 0.05)
# To identify which synonyms should be combined
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txtFeat]][grep("^c", glb_post_stem_words_terms_df_lst[[txtFeat]]$term), ])
chk_comb_cor <- function(syn_lst) {
# cor(terms_stem_mtrx[glbObsAll$.src == "Train", grep("^(damag|dent|ding)$", dimnames(terms_stem_mtrx)[[2]])], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
print(subset(glb_post_stem_words_terms_df_lst[[txtFeat]], term %in% syn_lst$syns))
print(subset(get_corpus_terms(tm_map(glbFeatsTextCorpus[[txtFeat]], mycombineSynonyms, list(syn_lst), lazy=FALSE)), term == syn_lst$word))
# cor(terms_stop_mtrx[glbObsAll$.src == "Train", grep("^(damage|dent|ding)$", dimnames(terms_stop_mtrx)[[2]])], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
# cor(rowSums(terms_stop_mtrx[glbObsAll$.src == "Train", grep("^(damage|dent|ding)$", dimnames(terms_stop_mtrx)[[2]])]), glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
}
#chk_comb_cor(syn_lst=list(word="cabl", syns=c("cabl", "cord")))
#chk_comb_cor(syn_lst=list(word="damag", syns=c("damag", "dent", "ding")))
#chk_comb_cor(syn_lst=list(word="dent", syns=c("dent", "ding")))
#chk_comb_cor(syn_lst=list(word="use", syns=c("use", "usag")))
glbFeatsTextSynonyms <- list()
# list parsed to collect glbFeatsText[[<txtFeat>]]$vldTerms
# glbFeatsTextSynonyms[["Hdln.my"]] <- list(NULL
# # people in places
# , list(word = "australia", syns = c("australia", "australian"))
# , list(word = "italy", syns = c("italy", "Italian"))
# , list(word = "newyork", syns = c("newyork", "newyorker"))
# , list(word = "Pakistan", syns = c("Pakistan", "Pakistani"))
# , list(word = "peru", syns = c("peru", "peruvian"))
# , list(word = "qatar", syns = c("qatar", "qatari"))
# , list(word = "scotland", syns = c("scotland", "scotish"))
# , list(word = "Shanghai", syns = c("Shanghai", "Shanzhai"))
# , list(word = "venezuela", syns = c("venezuela", "venezuelan"))
#
# # companies - needs to be data dependent
# # - e.g. ensure BNP in this experiment/feat always refers to BNPParibas
#
# # general synonyms
# , list(word = "Create", syns = c("Create","Creator"))
# , list(word = "cute", syns = c("cute","cutest"))
# , list(word = "Disappear", syns = c("Disappear","Fadeout"))
# , list(word = "teach", syns = c("teach", "taught"))
# , list(word = "theater", syns = c("theater", "theatre", "theatres"))
# , list(word = "understand", syns = c("understand", "understood"))
# , list(word = "weak", syns = c("weak", "weaken", "weaker", "weakest"))
# , list(word = "wealth", syns = c("wealth", "wealthi"))
#
# # custom synonyms (phrases)
#
# # custom synonyms (names)
# )
#glbFeatsTextSynonyms[["<txtFeat>"]] <- list(NULL
# , list(word="<stem1>", syns=c("<stem1>", "<stem1_2>"))
# )
for (txtFeat in names(glbFeatsTextSynonyms))
for (entryIx in 1:length(glbFeatsTextSynonyms[[txtFeat]])) {
glbFeatsTextSynonyms[[txtFeat]][[entryIx]]$word <-
str_to_lower(glbFeatsTextSynonyms[[txtFeat]][[entryIx]]$word)
glbFeatsTextSynonyms[[txtFeat]][[entryIx]]$syns <-
str_to_lower(glbFeatsTextSynonyms[[txtFeat]][[entryIx]]$syns)
}
glbFeatsTextSeed <- 181
# tm options include: check tm::weightSMART
glb_txt_terms_control <- list( # Gather model performance & run-time stats
# weighting = function(x) weightSMART(x, spec = "nnn")
# weighting = function(x) weightSMART(x, spec = "lnn")
# weighting = function(x) weightSMART(x, spec = "ann")
# weighting = function(x) weightSMART(x, spec = "bnn")
# weighting = function(x) weightSMART(x, spec = "Lnn")
#
weighting = function(x) weightSMART(x, spec = "ltn") # default
# weighting = function(x) weightSMART(x, spec = "lpn")
#
# weighting = function(x) weightSMART(x, spec = "ltc")
#
# weighting = weightBin
# weighting = weightTf
# weighting = weightTfIdf # : default
# termFreq selection criteria across obs: tm default: list(global=c(1, Inf))
, bounds = list(global = c(1, Inf))
# wordLengths selection criteria: tm default: c(3, Inf)
, wordLengths = c(1, Inf)
)
glb_txt_cor_var <- glb_rsp_var # : default # or c(<feat>)
# select one from c("union.top.val.cor", "top.cor", "top.val", default: "top.chisq", "sparse")
glbFeatsTextFilter <- "top.chisq"
glbFeatsTextTermsMax <- rep(10, length(glbFeatsText)) # :default
names(glbFeatsTextTermsMax) <- names(glbFeatsText)
# Text Processing Step: extractAssoc
glbFeatsTextAssocCor <- rep(1, length(glbFeatsText)) # :default
names(glbFeatsTextAssocCor) <- names(glbFeatsText)
# Remember to use stemmed terms
glb_important_terms <- list()
# Text Processing Step: extractPatterns (ngrams)
glbFeatsTextPatterns <- list()
#glbFeatsTextPatterns[[<txtFeat>>]] <- list()
#glbFeatsTextPatterns[[<txtFeat>>]] <- c(metropolitan.diary.colon = "Metropolitan Diary:")
# Have to set it even if it is not used
# Properties:
# numrows(glb_feats_df) << numrows(glbObsFit
# Select terms that appear in at least 0.2 * O(FP/FN(glbObsOOB)) ???
# numrows(glbObsOOB) = 1.1 * numrows(glbObsNew) ???
glb_sprs_thresholds <- NULL # or c(<txtFeat1> = 0.988, <txtFeat2> = 0.970, <txtFeat3> = 0.970)
glbFctrMaxUniqVals <- 20 # default: 20
glb_impute_na_data <- FALSE # or TRUE
glb_mice_complete.seed <- 144 # or any integer
glb_cluster <- FALSE # : default or TRUE
glb_cluster.seed <- 189 # or any integer
glb_cluster_entropy_var <- NULL # c(glb_rsp_var, as.factor(cut(glb_rsp_var, 3)), default: NULL)
glbFeatsTextClusterVarsExclude <- FALSE # default FALSE
glb_interaction_only_feats <- NULL # : default or c(<parent_feat> = "<child_feat>")
glbFeatsNzvFreqMax <- 19 # 19 : caret default
glbFeatsNzvUniqMin <- 10 # 10 : caret default
glbRFESizes <- list()
#glbRFESizes[["mdlFamily"]] <- c(4, 8, 16, 32, 64, 67, 68, 69) # Accuracy@69/70 = 0.8258
glbObsFitOutliers <- list()
# If outliers.n >= 10; consider concatenation of interaction vars
# glbObsFitOutliers[["<mdlFamily>"]] <- c(NULL
# is.na(.rstudent)
# max(.rstudent)
# is.na(.dffits)
# .hatvalues >= 0.99
# -38,167,642 < minmax(.rstudent) < 49,649,823
# , <comma-separated-<glbFeatsId>>
# )
glbObsTrnOutliers <- list()
glbObsTrnOutliers[["Final"]] <- union(glbObsFitOutliers[["All.X"]],
c(NULL
))
# influence.measures: car::outlier; rstudent; dffits; hatvalues; dfbeta; dfbetas
#mdlId <- "All.X##rcv#glm"; obs_df <- fitobs_df
#mdlId <- "RFE.X.glm"; obs_df <- fitobs_df
#mdlId <- "Final.glm"; obs_df <- trnobs_df
#mdlId <- "CSM2.X.glm"; obs_df <- fitobs_df
#print(outliers <- car::outlierTest(glb_models_lst[[mdlId]]$finalModel))
#mdlIdFamily <- paste0(head(unlist(str_split(mdlId, "\\.")), -1), collapse="."); obs_df <- dplyr::filter_(obs_df, interp(~(!(var %in% glbObsFitOutliers[[mdlIdFamily]])), var = as.name(glbFeatsId))); model_diags_df <- cbind(obs_df, data.frame(.rstudent=stats::rstudent(glb_models_lst[[mdlId]]$finalModel)), data.frame(.dffits=stats::dffits(glb_models_lst[[mdlId]]$finalModel)), data.frame(.hatvalues=stats::hatvalues(glb_models_lst[[mdlId]]$finalModel)));print(summary(model_diags_df[, c(".rstudent",".dffits",".hatvalues")])); table(cut(model_diags_df$.hatvalues, breaks=c(0.00, 0.98, 0.99, 1.00)))
#print(subset(model_diags_df, is.na(.rstudent))[, glbFeatsId])
#print(model_diags_df[which.max(model_diags_df$.rstudent), ])
#print(subset(model_diags_df, is.na(.dffits))[, glbFeatsId])
#print(model_diags_df[which.min(model_diags_df$.dffits), ])
#print(subset(model_diags_df, .hatvalues > 0.99)[, glbFeatsId])
#dffits_df <- merge(dffits_df, outliers_df, by="row.names", all.x=TRUE); row.names(dffits_df) <- dffits_df$Row.names; dffits_df <- subset(dffits_df, select=-Row.names)
#dffits_df <- merge(dffits_df, glbObsFit, by="row.names", all.x=TRUE); row.names(dffits_df) <- dffits_df$Row.names; dffits_df <- subset(dffits_df, select=-Row.names)
#subset(dffits_df, !is.na(.Bonf.p))
#mdlId <- "CSM.X.glm"; vars <- myextract_actual_feats(row.names(orderBy(reformulate(c("-", paste0(mdlId, ".imp"))), myget_feats_imp(glb_models_lst[[mdlId]]))));
#model_diags_df <- glb_get_predictions(model_diags_df, mdlId, glb_rsp_var)
#obs_ix <- row.names(model_diags_df) %in% names(outliers$rstudent)[1]
#obs_ix <- which(is.na(model_diags_df$.rstudent))
#obs_ix <- which(is.na(model_diags_df$.dffits))
#myplot_parcoord(obs_df=model_diags_df[, c(glbFeatsId, glbFeatsCategory, ".rstudent", ".dffits", ".hatvalues", glb_rsp_var, paste0(glb_rsp_var, mdlId), vars[1:min(20, length(vars))])], obs_ix=obs_ix, id_var=glbFeatsId, category_var=glbFeatsCategory)
#model_diags_df[row.names(model_diags_df) %in% names(outliers$rstudent)[c(1:2)], ]
#ctgry_diags_df <- model_diags_df[model_diags_df[, glbFeatsCategory] %in% c("Unknown#0"), ]
#myplot_parcoord(obs_df=ctgry_diags_df[, c(glbFeatsId, glbFeatsCategory, ".rstudent", ".dffits", ".hatvalues", glb_rsp_var, "startprice.log10.predict.RFE.X.glmnet", indep_vars[1:20])], obs_ix=row.names(ctgry_diags_df) %in% names(outliers$rstudent)[1], id_var=glbFeatsId, category_var=glbFeatsCategory)
#table(glbObsFit[model_diags_df[, glbFeatsCategory] %in% c("iPad1#1"), "startprice.log10.cut.fctr"])
#glbObsFit[model_diags_df[, glbFeatsCategory] %in% c("iPad1#1"), c(glbFeatsId, "startprice")]
# No outliers & .dffits == NaN
#myplot_parcoord(obs_df=model_diags_df[, c(glbFeatsId, glbFeatsCategory, glb_rsp_var, "startprice.log10.predict.RFE.X.glmnet", indep_vars[1:10])], obs_ix=seq(1:nrow(model_diags_df))[is.na(model_diags_df$.dffits)], id_var=glbFeatsId, category_var=glbFeatsCategory)
# Modify mdlId to (build & extract) "<FamilyId>#<Fit|Trn>#<caretMethod>#<preProc1.preProc2>#<samplingMethod>"
glb_models_lst <- list(); glb_models_df <- data.frame()
# Regression
if (glb_is_regression) {
glbMdlMethods <- c(NULL
# deterministic
#, "lm", # same as glm
, "glm", "bayesglm", "glmnet"
, "rpart"
# non-deterministic
, "gbm", "rf"
# Unknown
, "nnet" , "avNNet" # runs 25 models per cv sample for tunelength=5
, "svmLinear", "svmLinear2"
, "svmPoly" # runs 75 models per cv sample for tunelength=5
, "svmRadial"
, "earth"
, "bagEarth" # Takes a long time
)
} else
# Classification - Add ada (auto feature selection)
if (glb_is_binomial)
glbMdlMethods <- c(NULL
# deterministic
, "bagEarth" # Takes a long time
, "glm", "bayesglm", "glmnet"
, "nnet"
, "rpart"
# non-deterministic
, "gbm"
, "avNNet" # runs 25 models per cv sample for tunelength=5
, "rf"
# Unknown
, "lda", "lda2"
# svm models crash when predict is called -> internal to kernlab it should call predict without .outcome
, "svmLinear", "svmLinear2"
, "svmPoly" # runs 75 models per cv sample for tunelength=5
, "svmRadial"
, "earth"
) else
glbMdlMethods <- c(NULL
# deterministic
,"glmnet"
# non-deterministic
,"rf"
# Unknown
,"gbm","rpart"
)
glbMdlFamilies <- list(); glb_mdl_feats_lst <- list()
# family: Choose from c("RFE.X", "CSM.X", "All.X", "Best.Interact")
# methods: Choose from c(NULL, <method>, glbMdlMethods)
#glbMdlFamilies[["RFE.X"]] <- c("glmnet", "glm") # non-NULL vector is mandatory
glbMdlFamilies[["All.X"]] <- c("glmnet", "glm") # non-NULL vector is mandatory
#glbMdlFamilies[["Best.Interact"]] <- "glmnet" # non-NULL vector is mandatory
# Check if interaction features make RFE better
# glbMdlFamilies[["CSM.X"]] <- setdiff(glbMdlMethods, c("lda", "lda2")) # crashing due to category:.clusterid ??? #c("glmnet", "glm") # non-NULL list is mandatory
# glb_mdl_feats_lst[["CSM.X"]] <- c(NULL
# , <comma-separated-features-vector>
# )
# dAFeats.CSM.X %<d-% c(NULL
# # Interaction feats up to varImp(RFE.X.glmnet) >= 50
# , <comma-separated-features-vector>
# , setdiff(myextract_actual_feats(predictors(rfe_fit_results)), c(NULL
# , <comma-separated-features-vector>
# ))
# )
# glb_mdl_feats_lst[["CSM.X"]] <- "%<d-% dAFeats.CSM.X"
glbMdlFamilies[["Final"]] <- c(NULL) # NULL vector acceptable # c("glmnet", "glm")
glbMdlAllowParallel <- list()
#glbMdlAllowParallel[["<mdlId>"]] <- FALSE
# Check if tuning parameters make fit better; make it mdlFamily customizable ?
glbMdlTuneParams <- data.frame()
# When glmnet crashes at model$grid with error: ???
glmnetTuneParams <- rbind(data.frame()
,data.frame(parameter = "alpha", vals = "0.100 0.325 0.550 0.775 1.000")
,data.frame(parameter = "lambda", vals = "9.342e-02")
)
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams,
# cbind(data.frame(mdlId = "<mdlId>"),
# glmnetTuneParams))
#avNNet
# size=[1] 3 5 7 9; decay=[0] 1e-04 0.001 0.01 0.1; bag=[FALSE]; RMSE=1.3300906
#bagEarth
# degree=1 [2] 3; nprune=64 128 256 512 [1024]; RMSE=0.6486663 (up)
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "bagEarth", parameter = "nprune", vals = "256")
# ,data.frame(method = "bagEarth", parameter = "degree", vals = "2")
# ))
#earth
# degree=[1]; nprune=2 [9] 17 25 33; RMSE=0.1334478
#gbm
# shrinkage=0.05 [0.10] 0.15 0.20 0.25; n.trees=100 150 200 [250] 300; interaction.depth=[1] 2 3 4 5; n.minobsinnode=[10]; RMSE=0.2008313
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "gbm", parameter = "shrinkage", min = 0.05, max = 0.25, by = 0.05)
# ,data.frame(method = "gbm", parameter = "n.trees", min = 100, max = 300, by = 50)
# ,data.frame(method = "gbm", parameter = "interaction.depth", min = 1, max = 5, by = 1)
# ,data.frame(method = "gbm", parameter = "n.minobsinnode", min = 10, max = 10, by = 10)
# #seq(from=0.05, to=0.25, by=0.05)
# ))
#glmnet
# alpha=0.100 [0.325] 0.550 0.775 1.000; lambda=0.0005232693 0.0024288010 0.0112734954 [0.0523269304] 0.2428800957; RMSE=0.6164891
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "glmnet", parameter = "alpha", vals = "0.550 0.775 0.8875 0.94375 1.000")
# ,data.frame(method = "glmnet", parameter = "lambda", vals = "9.858855e-05 0.0001971771 0.0009152152 0.0042480525 0.0197177130")
# ))
#nnet
# size=3 5 [7] 9 11; decay=0.0001 0.001 0.01 [0.1] 0.2; RMSE=0.9287422
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "nnet", parameter = "size", vals = "3 5 7 9 11")
# ,data.frame(method = "nnet", parameter = "decay", vals = "0.0001 0.0010 0.0100 0.1000 0.2000")
# ))
#rf # Don't bother; results are not deterministic
# mtry=2 35 68 [101] 134; RMSE=0.1339974
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "rf", parameter = "mtry", vals = "2 5 9 13 17")
# ))
#rpart
# cp=0.020 [0.025] 0.030 0.035 0.040; RMSE=0.1770237
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "rpart", parameter = "cp", vals = "0.004347826 0.008695652 0.017391304 0.021739130 0.034782609")
# ))
#svmLinear
# C=0.01 0.05 [0.10] 0.50 1.00 2.00 3.00 4.00; RMSE=0.1271318; 0.1296718
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "svmLinear", parameter = "C", vals = "0.01 0.05 0.1 0.5 1")
# ))
#svmLinear2
# cost=0.0625 0.1250 [0.25] 0.50 1.00; RMSE=0.1276354
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method = "svmLinear2", parameter = "cost", vals = "0.0625 0.125 0.25 0.5 1")
# ))
#svmPoly
# degree=[1] 2 3 4 5; scale=0.01 0.05 [0.1] 0.5 1; C=0.50 1.00 [2.00] 3.00 4.00; RMSE=0.1276130
# glbMdlTuneParams <- myrbind_df(glbMdlTuneParams, rbind(data.frame()
# ,data.frame(method="svmPoly", parameter="degree", min=1, max=5, by=1) #seq(1, 5, 1)
# ,data.frame(method="svmPoly", parameter="scale", vals="0.01, 0.05, 0.1, 0.5, 1")
# ,data.frame(method="svmPoly", parameter="C", vals="0.50, 1.00, 2.00, 3.00, 4.00")
# ))
#svmRadial
# sigma=[0.08674323]; C=0.25 0.50 1.00 [2.00] 4.00; RMSE=0.1614957
#glb2Sav(); all.equal(sav_models_df, glb_models_df)
glb_preproc_methods <- NULL
# c("YeoJohnson", "center.scale", "range", "pca", "ica", "spatialSign")
# Baseline prediction model feature(s)
glb_Baseline_mdl_var <- NULL # or c("<feat>")
glbMdlMetric_terms <- NULL # or matrix(c(
# 0,1,2,3,4,
# 2,0,1,2,3,
# 4,2,0,1,2,
# 6,4,2,0,1,
# 8,6,4,2,0
# ), byrow=TRUE, nrow=5)
glbMdlMetricSummary <- NULL # or "<metric_name>"
glbMdlMetricMaximize <- NULL # or FALSE (TRUE is not the default for both classification & regression)
glbMdlMetricSummaryFn <- NULL # or function(data, lev=NULL, model=NULL) {
# confusion_mtrx <- t(as.matrix(confusionMatrix(data$pred, data$obs)))
# #print(confusion_mtrx)
# #print(confusion_mtrx * glbMdlMetric_terms)
# metric <- sum(confusion_mtrx * glbMdlMetric_terms) / nrow(data)
# names(metric) <- glbMdlMetricSummary
# return(metric)
# }
glbMdlCheckRcv <- FALSE # Turn it on when needed; otherwise takes long time
glb_rcv_n_folds <- 3 # or NULL
glb_rcv_n_repeats <- 3 # or NULL
glb_clf_proba_threshold <- NULL # 0.5
# Model selection criteria
if (glb_is_regression)
glbMdlMetricsEval <- c("min.RMSE.OOB", "max.R.sq.OOB", "max.Adj.R.sq.fit", "min.RMSE.fit")
#glbMdlMetricsEval <- c("min.RMSE.fit", "max.R.sq.fit", "max.Adj.R.sq.fit")
if (glb_is_classification) {
if (glb_is_binomial)
glbMdlMetricsEval <-
c("max.Accuracy.OOB", "max.AUCROCR.OOB", "max.AUCpROC.OOB", "min.aic.fit", "max.Accuracy.fit") else
glbMdlMetricsEval <- c("max.Accuracy.OOB", "max.Kappa.OOB")
}
# select from NULL [no ensemble models], "auto" [all models better than MFO or Baseline], c(mdl_ids in glb_models_lst) [Typically top-rated models in auto]
glb_mdl_ensemble <- NULL
# "%<d-% setdiff(mygetEnsembleAutoMdlIds(), 'CSM.X.rf')"
# c(<comma-separated-mdlIds>
# )
# Only for classifications; for regressions remove "(.*)\\.prob" form the regex
# tmp_fitobs_df <- glbObsFit[, grep(paste0("^", gsub(".", "\\.", mygetPredictIds$value, fixed = TRUE), "CSM\\.X\\.(.*)\\.prob"), names(glbObsFit), value = TRUE)]; cor_mtrx <- cor(tmp_fitobs_df); cor_vctr <- sort(cor_mtrx[row.names(orderBy(~-Overall, varImp(glb_models_lst[["Ensemble.repeatedcv.glmnet"]])$imp))[1], ]); summary(cor_vctr); cor_vctr
#ntv.glm <- glm(reformulate(indep_vars, glb_rsp_var), family = "binomial", data = glbObsFit)
#step.glm <- step(ntv.glm)
glb_sel_mdl_id <- "All.X##rcv#glmnet" #select from c(NULL, "All.X##rcv#glmnet", "RFE.X##rcv#glmnet", <mdlId>)
glb_fin_mdl_id <- NULL #select from c(NULL, glb_sel_mdl_id)
glb_dsp_cols <- c(".pos", glbFeatsId, glbFeatsCategory, glb_rsp_var
# List critical cols excl. above
)
# Output specs
# lclgetfltout_df <- function(obsout_df) {
# require(tidyr)
# obsout_df <- obsout_df %>%
# tidyr::separate("ImageId.x.y", c(".src", ".pos", "x", "y"),
# sep = "#", remove = TRUE, extra = "merge")
#
# # dplyr::summarize(xMeanN = mean(as.numeric(x)), yMeanN = mean(as.numeric(y)))
#
# return(fmnout_df)
# }
glbObsOut <- list(NULL
# glbFeatsId will be the first output column, by default
,vars = list()
# ,mapFn = function(obsout_df) {
# }
)
#obsout_df <- savobsout_df
glbObsOut$mapFn <- function(obsout_df) {
set.seed(997)
txfout_df <- obsout_df %>%
dplyr::mutate(
lunch = levels(glbObsTrn[, "lunch" ])[
round(mean(as.numeric(glbObsTrn[, "lunch" ])), 0)],
dinner = levels(glbObsTrn[, "dinner" ])[
round(mean(as.numeric(glbObsTrn[, "dinner" ])), 0)],
reserve = levels(glbObsTrn[, "reserve" ])[
round(mean(as.numeric(glbObsTrn[, "reserve" ])), 0)],
# outdoor = levels(glbObsTrn[, "outdoor" ])[
# rbinom(nrow(obsout_df), 1, mean(as.numeric(glbObsTrn[, "outdoor" ])) - 1) + 1],
outdoor =
ifelse(levels(glbObsTrn[, "outdoor.fctr" ])[as.numeric(outdoor.fctr)] == "N", "-1", "3"),
expensive = levels(glbObsTrn[, "expensive"])[
round(mean(as.numeric(glbObsTrn[, "expensive"])), 0)],
liquor = levels(glbObsTrn[, "liquor" ])[
round(mean(as.numeric(glbObsTrn[, "liquor" ])), 0)],
table = levels(glbObsTrn[, "table" ])[
round(mean(as.numeric(glbObsTrn[, "table" ])), 0)],
classy = levels(glbObsTrn[, "classy" ])[
round(mean(as.numeric(glbObsTrn[, "classy" ])), 0)],
kids = levels(glbObsTrn[, "kids" ])[
round(mean(as.numeric(glbObsTrn[, "kids" ])), 0)]
)
print("ObsNew output class tables:")
print(sapply(c("lunch","dinner","reserve","outdoor",
"expensive","liquor","table",
"classy","kids"),
function(feat) table(txfout_df[, feat], useNA = "ifany")))
txfout_df <- txfout_df %>%
dplyr::mutate(labels = "") %>%
dplyr::mutate(labels =
ifelse(lunch != "-1", paste(labels, lunch ), labels)) %>%
dplyr::mutate(labels =
ifelse(dinner != "-1", paste(labels, dinner ), labels)) %>%
dplyr::mutate(labels =
ifelse(reserve != "-1", paste(labels, reserve ), labels)) %>%
dplyr::mutate(labels =
ifelse(outdoor != "-1", paste(labels, outdoor ), labels)) %>%
dplyr::mutate(labels =
ifelse(expensive != "-1", paste(labels, expensive), labels)) %>%
dplyr::mutate(labels =
ifelse(liquor != "-1", paste(labels, liquor ), labels)) %>%
dplyr::mutate(labels =
ifelse(table != "-1", paste(labels, table ), labels)) %>%
dplyr::mutate(labels =
ifelse(classy != "-1", paste(labels, classy ), labels)) %>%
dplyr::mutate(labels =
ifelse(kids != "-1", paste(labels, kids ), labels)) %>%
dplyr::select(business_id, labels)
return(txfout_df)
}
#if (!is.null(glbObsOut$mapFn)) obsout_df <- glbObsOut$mapFn(obsout_df); print(head(obsout_df))
glb_out_obs <- NULL # select from c(NULL : default to "new", "all", "new", "trn")
if (glb_is_classification && glb_is_binomial) {
# glbObsOut$vars[["Proba.Y"]] <-
# "%<d-% glbObsNew[, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$prob]"
glbObsOut$vars[[glb_rsp_var]] <-
"%<d-% glbObsNew[, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value]"
} else {
# glbObsOut$vars[[glbFeatsId]] <-
# "%<d-% as.integer(gsub('Test#', '', glbObsNew[, glbFeatsId]))"
glbObsOut$vars[[glb_rsp_var]] <-
"%<d-% glbObsNew[, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value]"
# for (outVar in setdiff(glbFeatsExcludeLcl, glb_rsp_var_raw))
# glbObsOut$vars[[outVar]] <-
# paste0("%<d-% mean(glbObsAll[, \"", outVar, "\"], na.rm = TRUE)")
}
# glbObsOut$vars[[glb_rsp_var_raw]] <- glb_rsp_var_raw
# glbObsOut$vars[[paste0(head(unlist(strsplit(mygetPredictIds$value, "")), -1), collapse = "")]] <-
glbOutStackFnames <- NULL #: default
# c("ebayipads_txt_assoc1_out_bid1_stack.csv") # manual stack
# c("ebayipads_finmdl_bid1_out_nnet_1.csv") # universal stack
glbOut <- list(pfx = "YelpRest_color_outdoor_")
# lclImageSampleSeed <- 129
glbOutDataVizFname <- NULL # choose from c(NULL, "<projectId>_obsall.csv")
glbChunks <- list(labels = c("set_global_options_wd","set_global_options"
,"import.data","inspect.data","scrub.data","transform.data"
,"extract.features"
,"extract.features.datetime","extract.features.image","extract.features.price"
,"extract.features.text","extract.features.string"
,"extract.features.end"
,"manage.missing.data","cluster.data","partition.data.training","select.features"
,"fit.models_0","fit.models_1","fit.models_2","fit.models_3"
,"fit.data.training_0","fit.data.training_1"
,"predict.data.new"
,"display.session.info"))
# To ensure that all chunks in this script are in glbChunks
if (!is.null(chkChunksLabels <- knitr::all_labels()) && # knitr::all_labels() doesn't work in console runs
!identical(chkChunksLabels, glbChunks$labels)) {
print(sprintf("setdiff(chkChunksLabels, glbChunks$labels): %s",
setdiff(chkChunksLabels, glbChunks$labels)))
print(sprintf("setdiff(glbChunks$labels, chkChunksLabels): %s",
setdiff(glbChunks$labels, chkChunksLabels)))
}
glbChunks[["first"]] <- NULL #default: script will load envir from previous chunk
glbChunks[["last"]] <- NULL #"extract.features.end" #NULL #default: script will save envir at end of this chunk
#mysavChunk(glbOut$pfx, glbChunks[["last"]])
# Inspect max OOB FP
#chkObsOOB <- subset(glbObsOOB, !label.fctr.All.X..rcv.glmnet.is.acc)
#chkObsOOBFP <- subset(chkObsOOB, label.fctr.All.X..rcv.glmnet == "left_eye_center") %>% dplyr::mutate(Probability1 = label.fctr.All.X..rcv.glmnet.prob) %>% select(-.src, -.pos, -x, -y) %>% lclgetfltout_df() %>% mutate(obj.distance = (((as.numeric(x) - left_eye_center_x.int) ^ 2) + ((as.numeric(y) - left_eye_center_y.int) ^ 2)) ^ 0.5) %>% dplyr::top_n(5, obj.distance) %>% dplyr::top_n(5, -patch.cor)
#
#newImgObs <- glbObsNew[(glbObsNew$ImageId == "Test#0001"), ]; print(newImgObs[which.max(newImgObs$label.fctr.Final..rcv.glmnet.prob), ])
#OOBImgObs <- glbObsOOB[(glbObsOOB$ImageId == "Train#0003"), ]; print(OOBImgObs[which.max(OOBImgObs$label.fctr.All.X..rcv.glmnet.prob), ])
#load("<scriptName>_extract.features.end.RData", verbose = TRUE)
#mygetImage(which(glbObsAll[, glbFeatsId] == "Train#0003"), names(glbFeatsImage)[1], plot = TRUE, featHighlight = c("left_eye_center_x", "left_eye_center_y"), ovrlHighlight = c(66, 35))
# Depict process
glb_analytics_pn <- petrinet(name = "glb_analytics_pn",
trans_df = data.frame(id = 1:6,
name = c("data.training.all","data.new",
"model.selected","model.final",
"data.training.all.prediction","data.new.prediction"),
x=c( -5,-5,-15,-25,-25,-35),
y=c( -5, 5, 0, 0, -5, 5)
),
places_df=data.frame(id=1:4,
name=c("bgn","fit.data.training.all","predict.data.new","end"),
x=c( -0, -20, -30, -40),
y=c( 0, 0, 0, 0),
M0=c( 3, 0, 0, 0)
),
arcs_df = data.frame(
begin = c("bgn","bgn","bgn",
"data.training.all","model.selected","fit.data.training.all",
"fit.data.training.all","model.final",
"data.new","predict.data.new",
"data.training.all.prediction","data.new.prediction"),
end = c("data.training.all","data.new","model.selected",
"fit.data.training.all","fit.data.training.all","model.final",
"data.training.all.prediction","predict.data.new",
"predict.data.new","data.new.prediction",
"end","end")
))
#print(ggplot.petrinet(glb_analytics_pn))
print(ggplot.petrinet(glb_analytics_pn) + coord_flip())
## Loading required package: grid
glb_analytics_avl_objs <- NULL
glb_chunks_df <- myadd_chunk(NULL, "import.data")
## label step_major step_minor label_minor bgn end elapsed
## 1 import.data 1 0 0 9.012 NA NA
1.0: import data## [1] "Reading file ./data/train_color.csv..."
## [1] "dimensions of data in ./data/train_color.csv: 2,000 rows x 18 cols"
## [1] " Truncating imgResXLst to first 100 chars..."
## [1] " Truncating imgResYLst to first 100 chars..."
## [1] " Truncating imgResXYLst to first 100 chars..."
## [1] " Truncating imgLumR.meanLst to first 100 chars..."
## [1] " Truncating imgLumR.madLst to first 100 chars..."
## [1] " Truncating imgLumB.meanLst to first 100 chars..."
## [1] " Truncating imgLumB.madLst to first 100 chars..."
## [1] " Truncating imgLumG.meanLst to first 100 chars..."
## [1] " Truncating imgLumG.madLst to first 100 chars..."
## [1] " Truncating imgCorRBLst to first 100 chars..."
## [1] " Truncating imgCorBGLst to first 100 chars..."
## [1] " Truncating imgCorGRLst to first 100 chars..."
## [1] " Truncating imgCosSmlRBLst to first 100 chars..."
## [1] " Truncating imgCosSmlBGLst to first 100 chars..."
## [1] " Truncating imgCosSmlGRLst to first 100 chars..."
## business_id labels nImgs
## 1 1000 1 2 3 4 5 6 7 54
## 2 1001 0 1 6 8 9
## 3 100 1 2 4 5 6 7 84
## 4 1006 1 2 4 5 6 22
## 5 1010 0 6 8 11
## 6 101 1 2 3 4 5 6 121
## imgResXLst
## 1 500,375,375,375,375,375,500,500,500,500,500,500,500,500,375,414,373,500,399,375,375,375,500,500,472,
## 2 500,375,500,500,500,366,358,444,500
## 3 500,375,375,375,375,500,375,375,500,375,373,375,375,500,375,500,500,500,500,375,375,375,375,375,375,
## 4 500,373,281,500,500,500,500,500,500,500,500,396,500,500,500,281,281,375,375,375,375,375
## 5 375,500,375,500,500,500,500,375,500,500,500
## 6 375,299,299,299,299,299,299,373,373,373,373,500,500,408,500,500,500,500,375,500,373,500,500,375,375,
## imgResYLst
## 1 500,500,500,500,500,500,332,332,332,332,332,375,375,375,500,500,500,389,500,500,500,500,375,375,500,
## 2 375,500,375,361,375,500,500,479,373
## 3 375,500,500,500,500,375,500,500,268,500,500,500,500,375,500,375,375,375,375,500,500,500,500,500,500,
## 4 375,500,500,273,375,375,375,375,375,399,290,500,500,500,375,500,500,500,500,500,500,500
## 5 500,375,500,375,375,375,375,500,375,375,375
## 6 500,500,500,500,500,500,500,500,500,500,500,282,282,306,388,375,375,375,500,373,500,348,386,500,500,
## imgResXYLst
## 1 250000,187500,187500,187500,187500,187500,166000,166000,166000,166000,166000,187500,187500,187500,18
## 2 187500,187500,187500,180500,187500,183000,179000,212676,186500
## 3 187500,187500,187500,187500,187500,187500,187500,187500,134000,187500,186500,187500,187500,187500,18
## 4 187500,186500,140500,136500,187500,187500,187500,187500,187500,199500,145000,198000,250000,250000,18
## 5 187500,187500,187500,187500,187500,187500,187500,187500,187500,187500,187500
## 6 187500,149500,149500,149500,149500,149500,149500,186500,186500,186500,186500,141000,141000,124848,19
## imgLumR.meanLst
## 1 0.470262839215686,0.314501103267974,0.373570049673203,0.435050499346405,0.471514373856209,0.52830353
## 2 0.62211543006536,0.381729024836601,0.515106467973856,0.543710759871816,0.481847780392157,0.443394792
## 3 0.554903968627451,0.333421218300654,0.273489254901961,0.53425428496732,0.290379210457516,0.268196162
## 4 0.498694567320261,0.238366125216843,0.420795199218477,0.391512317747612,0.146110870588235,0.62251678
## 5 0.308927351633987,0.690016020915033,0.601689976470588,0.580284402614379,0.643895048366013,0.66549749
## 6 0.407443932026144,0.424256672568693,0.417117371630927,0.418903036264673,0.563773283494,0.41503403501
## imgLumR.madLst
## 1 0.308148235294118,0.145352941176471,0.162795294117647,0.313962352941176,0.122096470588235,0.12209647
## 2 0.261635294117647,0.180237647058823,0.226750588235294,0.267449411764706,0.308148235294118,0.31396235
## 3 0.0930258823529412,0.290705882352941,0.191865882352941,0.215122352941177,0.104654117647059,0.2034941
## 4 0.348847058823529,0.0930258823529412,0.232564705882353,0.203494117647059,0.0348847058823529,0.133724
## 5 0.139538823529412,0.186051764705882,0.209308235294118,0.203494117647059,0.0930258823529412,0.19768,0
## 6 0.191865882352941,0.244192941176471,0.261635294117647,0.168609411764706,0.255821176470588,0.26163529
## imgLumB.meanLst
## 1 0.400086839215686,0.281435168627451,0.352887864052288,0.391357783006536,0.431559173856209,0.42215295
## 2 0.40706708496732,0.340096773856209,0.463498896732026,0.406464005214274,0.341465432679739,0.417969720
## 3 0.549200501960784,0.24384842875817,0.202027189542484,0.379692507189542,0.172298311111111,0.168613291
## 4 0.441220977777778,0.175660390054145,0.393309106133557,0.223112949795303,0.119968815686275,0.62076923
## 5 0.244836329411765,0.572361558169935,0.452952366013072,0.446202938562091,0.588746206535948,0.53801562
## 6 0.302397992156863,0.412772929372418,0.401205010164601,0.378327864122238,0.389472935930225,0.27080076
## imgLumB.madLst
## 1 0.319776470588235,0.145352941176471,0.156981176470588,0.29652,0.151167058823529,0.139538823529412,0.
## 2 0.313962352941176,0.156981176470588,0.302334117647059,0.151167058823529,0.186051764705882,0.26744941
## 3 0.104654117647059,0.19768,0.0872117647058823,0.267449411764706,0.104654117647059,0.145352941176471,0
## 4 0.29652,0.0639552941176471,0.209308235294118,0.0813976470588235,0.0232564705882353,0.122096470588235
## 5 0.104654117647059,0.250007058823529,0.232564705882353,0.168609411764706,0.0813976470588236,0.2325647
## 6 0.250007058823529,0.290705882352941,0.302334117647059,0.209308235294118,0.424430588235294,0.21512235
## imgLumG.meanLst
## 1 0.380454243137255,0.246817840522876,0.314116852287582,0.363030546405229,0.427195963398693,0.38872991
## 2 0.279726452287582,0.317789532026144,0.421921568627451,0.433254011188963,0.252531011764706,0.38298002
## 3 0.503275879738562,0.168192564705882,0.156450175163399,0.279168690196078,0.113039288888889,0.10869111
## 4 0.296656836601307,0.13043553593019,0.354685172004745,0.170870473317532,0.0825161202614379,0.42325011
## 5 0.202443879738562,0.408847728104575,0.303654671895425,0.334231759477124,0.524265328104575,0.36137269
## 6 0.235238107189542,0.313427214899338,0.315523745819398,0.264700452488688,0.390862640173126,0.13762017
## imgLumG.madLst
## 1 0.232564705882353,0.127910588235294,0.145352941176471,0.250007058823529,0.186051764705882,0.14535294
## 2 0.226750588235294,0.139538823529412,0.430244705882353,0.244192941176471,0.116282352941176,0.28489176
## 3 0.0930258823529412,0.104654117647059,0.0639552941176471,0.255821176470588,0.0872117647058823,0.08721
## 4 0.151167058823529,0.0406988235294118,0.186051764705882,0.0639552941176471,0.0232564705882353,0.11046
## 5 0.0813976470588235,0.337218823529412,0.162795294117647,0.122096470588235,0.0523270588235296,0.279077
## 6 0.215122352941177,0.418616470588235,0.354661176470588,0.255821176470588,0.372103529411765,0.06395529
## imgCorRBLst
## 1 0.970296012735957,0.980985682842824,0.972093026685952,0.983724333941815,0.926027872315143,0.95996940
## 2 0.89794420182504,0.963494559422061,0.919773168736845,0.69235792837934,0.798992871092365,0.9736688625
## 3 0.88980511332014,0.982156271657673,0.986750578890926,0.968016666545741,0.904980631748477,0.935874086
## 4 0.934801931155002,0.935308855763743,0.978877671329745,0.91944404028056,0.977686193239092,0.992738523
## 5 0.953268882045949,0.896524085834796,0.943186524115846,0.905422287806635,0.951672969296612,0.89600712
## 6 0.94600768666589,0.97996279191433,0.962868692950767,0.969850894428032,0.757677316058063,0.9703135979
## imgCorBGLst
## 1 0.927947240419192,0.981032496136578,0.96955277428845,0.955336224085597,0.915268062088783,0.988013840
## 2 0.927140641328733,0.970350055669286,0.920977235836244,0.867252069661227,0.902914133624919,0.98229073
## 3 0.95460533534165,0.96530200316397,0.941724560255717,0.974498937299788,0.955756380404291,0.9677352722
## 4 0.87106377293215,0.962551893540078,0.98976645020501,0.987436732442999,0.965603192699164,0.8927327877
## 5 0.887466241845615,0.92670807569305,0.896832194480426,0.933745458476825,0.949333538913154,0.893627762
## 6 0.9871522726707,0.932063125304967,0.961441605438725,0.974698436606718,0.870489705878867,0.9525350846
## imgCorGRLst
## 1 0.85219702103209,0.944813172951291,0.920251569196133,0.913370447315881,0.728953194085978,0.941790427
## 2 0.808981100884253,0.88550936318431,0.762637574605669,0.376703880334306,0.578244046412377,0.941295772
## 3 0.847621407193241,0.92694807051355,0.937592397476484,0.926798909689827,0.820502357384701,0.877048066
## 4 0.79162264641504,0.892536640871731,0.962771424520778,0.882261983517338,0.909919127928424,0.892942677
## 5 0.727279485216904,0.764056996214013,0.777831573896037,0.78088607187468,0.885314564219542,0.698464544
## 6 0.939578446031527,0.925955472045309,0.886112385015734,0.914342530336461,0.907608345597467,0.90372975
## imgCosSmlRBLst
## 1 0.961120503189668,0.959983778123168,0.971567089324108,0.979125620481285,0.968697673098018,0.98854247
## 2 0.915274523521204,0.971958916855887,0.919835219182328,0.884919588642168,0.910722614004215,0.97942304
## 3 0.993414744197245,0.958187824846168,0.964877716984537,0.973537472575526,0.946477604939744,0.94920268
## 4 0.919247057411873,0.915336561758393,0.991620853519786,0.867311395735846,0.936240775424546,0.97745149
## 5 0.8608334116731,0.929339713368627,0.910434533955791,0.921482382603236,0.995931928356368,0.9122570357
## 6 0.980194796013886,0.959499973217363,0.941211660915583,0.931331791339104,0.955432284276881,0.89439310
## imgCosSmlBGLst
## 1 0.97522013067789,0.986144273136297,0.988225607138478,0.988870895735175,0.989438414537079,0.997178834
## 2 0.970619992118104,0.992501438726135,0.969513978027899,0.974530959738499,0.979722839640874,0.99366170
## 3 0.997592656674784,0.981651052987919,0.969675681233784,0.993058742872205,0.985807813173355,0.98655341
## 4 0.946268118128064,0.971513086772763,0.997785703330826,0.983580476140314,0.974096277906093,0.98605329
## 5 0.939634138571773,0.970886470789747,0.952626164808297,0.969001933381393,0.998055368978492,0.95827487
## 6 0.995740103974801,0.966934165622831,0.972355601845161,0.968327122440442,0.948554604719041,0.94121102
## imgCosSmlGRLst
## 1 0.961120503189668,0.959983778123168,0.971567089324108,0.979125620481285,0.968697673098018,0.98854247
## 2 0.915274523521204,0.971958916855887,0.919835219182328,0.884919588642168,0.910722614004215,0.97942304
## 3 0.993414744197245,0.958187824846168,0.964877716984537,0.973537472575526,0.946477604939744,0.94920268
## 4 0.919247057411873,0.915336561758393,0.991620853519786,0.867311395735846,0.936240775424546,0.97745149
## 5 0.8608334116731,0.929339713368627,0.910434533955791,0.921482382603236,0.995931928356368,0.9122570357
## 6 0.980194796013886,0.959499973217363,0.941211660915583,0.931331791339104,0.955432284276881,0.89439310
## business_id labels nImgs
## 69 1102 6 8 37
## 305 1479 0 3 8 306
## 1019 2829 0 2 3 8 104
## 1455 3650 8 42
## 1468 3675 1 2 3 4 5 6 7 32
## 1978 959 3 5 6 8 29
## imgResXLst
## 69 375,500,375,500,375,373,500,452,468,500,500,500,500,500,375,500,500,500,500,500,500,500,373,500,500,
## 305 500,373,373,500,500,500,500,373,375,500,500,500,500,373,373,373,375,500,375,373,373,375,373,500,500,
## 1019 500,375,375,500,500,375,375,375,500,373,375,375,500,375,500,375,375,375,374,375,500,375,375,500,375,
## 1455 375,375,375,375,375,375,375,500,375,500,500,375,375,375,500,375,500,375,500,373,375,500,500,375,281,
## 1468 156,375,500,500,500,500,500,500,500,375,375,375,500,500,500,375,500,375,375,500,500,375,441,500,433,
## 1978 500,500,375,500,500,374,373,373,373,500,500,373,500,375,500,500,375,375,500,375,375,500,375,500,500,
## imgResYLst
## 69 500,375,500,375,500,500,373,500,500,375,375,375,375,332,500,375,375,375,375,375,375,373,500,373,500,
## 305 373,500,500,373,500,500,500,500,500,375,375,375,375,500,500,500,500,299,500,500,500,500,500,331,375,
## 1019 500,500,500,375,375,500,500,500,375,500,500,500,375,500,375,500,500,500,500,500,500,500,500,375,500,
## 1455 500,500,500,500,500,500,500,375,500,375,281,500,500,500,376,500,376,500,375,500,500,376,376,500,500,
## 1468 121,500,340,500,375,375,375,375,375,500,500,500,375,375,375,500,375,500,500,375,283,500,500,294,500,
## 1978 373,373,500,375,375,500,500,500,500,375,373,500,281,500,375,375,500,500,375,500,500,373,500,375,375,
## imgResXYLst
## 69 187500,187500,187500,187500,187500,186500,186500,226000,234000,187500,187500,187500,187500,166000,18
## 305 186500,186500,186500,186500,250000,250000,250000,186500,187500,187500,187500,187500,187500,186500,18
## 1019 250000,187500,187500,187500,187500,187500,187500,187500,187500,186500,187500,187500,187500,187500,18
## 1455 187500,187500,187500,187500,187500,187500,187500,187500,187500,187500,140500,187500,187500,187500,18
## 1468 18876,187500,170000,250000,187500,187500,187500,187500,187500,187500,187500,187500,187500,187500,187
## 1978 186500,186500,187500,187500,187500,187000,186500,186500,186500,187500,186500,186500,140500,187500,18
## imgLumR.meanLst
## 69 0.630319100653595,0.434767037908497,0.578724894117647,0.608180894117647,0.479252245751634,0.40997171
## 305 0.512167607632866,0.751418220049414,0.730782442306681,0.250185775114335,0.479079811764706,0.52160531
## 1019 0.413653349019608,0.475505338562092,0.75285299869281,0.722328679738562,0.639082478431373,0.761316852
## 1455 0.425864554248366,0.829548758169935,0.392219210457516,0.625947419607843,0.504728909803922,0.50392686
## 1468 0.83060718247884,0.454624815686274,0.353021499423299,0.48143582745098,0.462115158169935,0.4611953359
## 1978 0.776167544551333,0.80078271565999,0.63104148496732,0.628821061437909,0.456997960784314,0.4166819754
## imgLumR.madLst
## 69 0.273263529411765,0.290705882352941,0.19768,0.127910588235294,0.284891764705882,0.308148235294118,0.
## 305 0.223843529411765,0.232564705882353,0.279077647058823,0.0930258823529412,0.534898823529412,0.5000141
## 1019 0.325590588235294,0.325590588235294,0.122096470588235,0.191865882352941,0.383731764705882,0.13372470
## 1455 0.186051764705882,0.0406988235294118,0.180237647058824,0.377917647058823,0.261635294117647,0.2093082
## 1468 0.0116282352941176,0.360475294117647,0.19768,0.290705882352941,0.284891764705882,0.354661176470588,0
## 1978 0.145352941176471,0.127910588235294,0.250007058823529,0.162795294117647,0.174423529411765,0.25000705
## imgLumB.meanLst
## 69 0.504768878431373,0.375535560784314,0.468197793464052,0.573440020915033,0.393539952941176,0.40012849
## 305 0.485048078641644,0.430985985386112,0.405060821111286,0.266690953056826,0.4361336,0.528504941176471,
## 1019 0.289846164705882,0.38000062745098,0.673006264052288,0.649674248366013,0.540194509803922,0.628933186
## 1455 0.365380852287582,0.826025830065359,0.412133521568627,0.500160292810457,0.39467028496732,0.406488847
## 1468 0.884184502366321,0.297688867973856,0.235914094579008,0.423714650980392,0.465319048366013,0.43651630
## 1978 0.409592409188877,0.374691058192714,0.458103550326797,0.583654065359477,0.409570698039216,0.42318553
## imgLumB.madLst
## 69 0.255821176470588,0.226750588235294,0.244192941176471,0.145352941176471,0.273263529411765,0.25582117
## 305 0.220936470588235,0.284891764705882,0.232564705882353,0.0930258823529412,0.430244705882353,0.5232705
## 1019 0.186051764705882,0.377917647058824,0.215122352941176,0.255821176470588,0.453501176470588,0.38373176
## 1455 0.191865882352941,0.0581411764705882,0.186051764705882,0.337218823529412,0.267449411764706,0.2034941
## 1468 0.0058141176470588,0.226750588235294,0.110468235294118,0.348847058823529,0.290705882352941,0.3546611
## 1978 0.284891764705882,0.29652,0.261635294117647,0.19768,0.354661176470588,0.209308235294118,0.2790776470
## imgLumG.meanLst
## 69 0.348071236601307,0.238894180392157,0.368137976470588,0.509629511111111,0.333012538562092,0.33844014
## 305 0.433452094832571,0.0415670504126584,0.0416423697629186,0.265246680334332,0.410277835294118,0.497448
## 1019 0.300140658823529,0.302639435294118,0.6583392,0.567931461437909,0.475432888888889,0.593615874509804,
## 1455 0.320086671895425,0.714237803921569,0.421558922875817,0.384628308496732,0.309876475816993,0.29956486
## 1468 0.922914667032314,0.195793652287582,0.155371095732411,0.458351545098039,0.450472596078431,0.41863140
## 1978 0.383613099931662,0.425149681963938,0.376852873202614,0.526616930718954,0.35237845751634,0.387384817
## imgLumG.madLst
## 69 0.313962352941176,0.127910588235294,0.308148235294118,0.162795294117647,0.220936470588235,0.23837882
## 305 0.279077647058823,0.0348847058823529,0.0348847058823529,0.122096470588235,0.261635294117647,0.488385
## 1019 0.191865882352941,0.232564705882353,0.244192941176471,0.348847058823529,0.354661176470588,0.45931529
## 1455 0.203494117647059,0.255821176470588,0.191865882352941,0.244192941176471,0.319776470588235,0.18605176
## 1468 0.0058141176470588,0.0930258823529412,0.0581411764705882,0.383731764705882,0.302334117647059,0.37210
## 1978 0.29652,0.302334117647059,0.302334117647059,0.273263529411765,0.424430588235294,0.156981176470588,0.
## imgCorRBLst
## 69 0.775784863461555,0.975392925473354,0.970575786385015,0.811122919169779,0.96838004089326,0.893383243
## 305 0.916705460321728,0.923432054198346,0.962236606414598,0.949611784945838,0.942974381320465,0.92762095
## 1019 0.770133000192954,0.968433680319795,0.885812895183246,0.964964833157131,0.967809705916519,0.69844714
## 1455 0.923422686665924,0.946115377350327,0.994733205272001,0.920409867719783,0.946535396257377,0.90181467
## 1468 0.937322453638541,0.858389489452281,0.938955693496779,0.970476303798703,0.990952423950598,0.92778718
## 1978 -0.0281327784047273,0.0552334348143238,0.855139882101446,0.937737441618445,0.769514359463716,0.95170
## imgCorBGLst
## 69 0.821536990566542,0.87172237002779,0.944022006048227,0.897846519298661,0.925754752867085,0.886014818
## 305 0.940718474664383,0.258963974614282,0.351939978024315,0.95601340317891,0.784762932627725,0.951470421
## 1019 0.945080145647578,0.947626158164648,0.987941458273341,0.968079262870157,0.974517647263913,0.92991015
## 1455 0.881083583201864,0.861778923085294,0.991788698466626,0.868937086578923,0.842140787204594,0.92277118
## 1468 0.917352611282381,0.882404765003618,0.918995281746173,0.984984393899912,0.967729403676817,0.96463904
## 1978 0.88408136845642,0.915330690152821,0.923467373561779,0.958075745223627,0.952846624335354,0.931984938
## imgCorGRLst
## 69 0.420512042868083,0.803322278424333,0.865470538104111,0.531903753095919,0.840090716670252,0.80078060
## 305 0.793532270981144,0.211922775984,0.352085826224826,0.881196301710926,0.579331278644826,0.86354192168
## 1019 0.783908565922002,0.861862725549643,0.877251518988349,0.883934553926512,0.90174161710988,0.615518358
## 1455 0.716769721003064,0.751193713079078,0.982225673388848,0.649512451448811,0.684086680436353,0.73314103
## 1468 0.796134113194056,0.633167478221707,0.784971188280101,0.951917242808864,0.943965873759617,0.84340955
## 1978 -0.0199378557564711,0.0371169902642393,0.649659179311548,0.919669512024494,0.59223295989867,0.874882
## imgCosSmlRBLst
## 69 0.864139125817003,0.924424486237963,0.93669622133763,0.944454871331937,0.939907906345332,0.921700634
## 305 0.952458077338815,0.66833043714413,0.730183771000588,0.929718320063934,0.867074722633519,0.956265659
## 1019 0.916056463825519,0.917325766621219,0.980692295979401,0.96391632656206,0.957168451959091,0.934739247
## 1455 0.943997914573226,0.974271556910883,0.995659444283183,0.920484789333557,0.890894164187417,0.92283008
## 1468 0.969622573860326,0.789297544009653,0.902466600310012,0.979104086019529,0.986762426422017,0.95413899
## 1978 0.810369469605905,0.823407050848508,0.915119902951158,0.981189725903386,0.860012239305377,0.96412152
## imgCosSmlBGLst
## 69 0.947793796028497,0.949052292586179,0.976549786800773,0.986646851750891,0.972202942635765,0.95403227
## 305 0.982901244200365,0.669884256785878,0.729905020730947,0.974843522189901,0.935488129310655,0.98371063
## 1019 0.974389766695079,0.969617697949604,0.998201057326754,0.989427997946847,0.989608713219376,0.98642148
## 1455 0.974460273334725,0.984203931214097,0.99822952729348,0.967899235931948,0.941966574342945,0.972491513
## 1468 0.993620705243786,0.923659056280079,0.960562492724143,0.995546862990142,0.992189843227971,0.98854775
## 1978 0.968806956581186,0.975986056211238,0.977545312031002,0.991282602000925,0.977887283279781,0.98101038
## imgCosSmlGRLst
## 69 0.864139125817003,0.924424486237963,0.93669622133763,0.944454871331937,0.939907906345332,0.921700634
## 305 0.952458077338815,0.66833043714413,0.730183771000588,0.929718320063934,0.867074722633519,0.956265659
## 1019 0.916056463825519,0.917325766621219,0.980692295979401,0.96391632656206,0.957168451959091,0.934739247
## 1455 0.943997914573226,0.974271556910883,0.995659444283183,0.920484789333557,0.890894164187417,0.92283008
## 1468 0.969622573860326,0.789297544009653,0.902466600310012,0.979104086019529,0.986762426422017,0.95413899
## 1978 0.810369469605905,0.823407050848508,0.915119902951158,0.981189725903386,0.860012239305377,0.96412152
## business_id labels nImgs
## 1995 99 1 2 4 5 6 7 139
## 1996 991 1 2 3 5 6 7 84
## 1997 993 3 6 8 34
## 1998 997 8 107
## 1999 998 1 2 4 5 6 7 320
## 2000 999 1 2 5 6 7 33
## imgResXLst
## 1995 500,500,500,500,373,373,500,500,500,500,500,500,500,500,375,500,500,375,500,375,375,375,375,500,375,
## 1996 500,373,500,375,375,375,375,375,375,375,415,500,500,352,480,375,500,500,281,500,500,500,500,500,500,
## 1997 500,500,500,500,500,299,500,500,375,500,500,500,375,375,375,375,375,500,500,500,375,500,500,373,500,
## 1998 320,376,375,500,500,500,467,467,500,500,500,500,500,500,500,500,500,500,500,500,282,375,500,500,500,
## 1999 500,500,500,375,500,500,500,500,500,375,500,500,500,500,500,375,500,500,500,500,375,375,375,375,375,
## 2000 375,500,500,500,500,500,500,500,375,375,375,299,232,500,500,500,500,500,500,500,500,500,375,500,500,
## imgResYLst
## 1995 375,500,500,500,500,500,375,375,375,375,375,375,375,375,500,375,375,500,375,500,500,500,500,500,500,
## 1996 500,500,373,500,500,500,500,500,500,500,499,387,375,500,360,500,375,500,500,375,373,373,373,373,373,
## 1997 373,375,375,375,299,500,299,375,500,375,375,375,500,500,500,500,500,373,280,280,500,281,373,500,373,
## 1998 240,500,500,414,375,375,351,351,373,373,373,336,336,336,336,343,336,337,337,343,500,500,500,500,500,
## 1999 450,279,281,500,375,375,373,282,375,500,375,340,375,375,375,500,373,373,373,373,500,500,500,500,500,
## 2000 500,375,375,375,375,375,375,375,500,500,500,500,64,281,281,281,375,375,375,375,375,375,500,500,500,5
## imgResXYLst
## 1995 187500,250000,250000,250000,186500,186500,187500,187500,187500,187500,187500,187500,187500,187500,18
## 1996 250000,186500,186500,187500,187500,187500,187500,187500,187500,187500,207085,193500,187500,176000,17
## 1997 186500,187500,187500,187500,149500,149500,149500,187500,187500,187500,187500,187500,187500,187500,18
## 1998 76800,188000,187500,207000,187500,187500,163917,163917,186500,186500,186500,168000,168000,168000,168
## 1999 225000,139500,140500,187500,187500,187500,186500,141000,187500,187500,187500,170000,187500,187500,18
## 2000 187500,187500,187500,187500,187500,187500,187500,187500,187500,187500,187500,149500,14848,140500,140
## imgLumR.meanLst
## 1995 0.609013291503268,0.671996549019608,0.664975231372549,0.615952847058823,0.544654197550334,0.75567107
## 1996 0.303500956862745,0.500386542606319,0.497328349892236,0.510048041830065,0.35996394248366,0.193090949
## 1997 0.51617616569416,0.704037835294118,0.364285992156863,0.298759383006536,0.481318801232868,0.659600236
## 1998 0.492723549836601,0.720716770963705,0.488756413071895,0.637316832433456,0.576713809150327,0.29193507
## 1999 0.415709978213508,0.383960952983344,0.363882911171586,0.514305338562091,0.468592711111111,0.75511418
## 2000 0.606548873202614,0.794814138562091,0.667672575163399,0.398700277124183,0.510849150326797,0.40605502
## imgLumR.madLst
## 1995 0.377917647058823,0.186051764705882,0.220936470588235,0.238378823529412,0.354661176470588,0.11046823
## 1996 0.186051764705882,0.337218823529412,0.308148235294118,0.34303294117647,0.279077647058823,0.104654117
## 1997 0.19768,0.261635294117647,0.308148235294118,0.238378823529412,0.581411764705882,0.313962352941176,0.
## 1998 0.220936470588235,0.19768,0.383731764705882,0.244192941176471,0.372103529411765,0.156981176470588,0.
## 1999 0.279077647058823,0.203494117647059,0.19768,0.215122352941176,0.331404705882353,0.122096470588235,0.
## 2000 0.238378823529412,0.116282352941176,0.203494117647059,0.343032941176471,0.191865882352941,0.23256470
## imgLumB.meanLst
## 1995 0.421399905882353,0.623848015686274,0.59892302745098,0.595149474509804,0.524482994270094,0.788759522
## 1996 0.129238949019608,0.489229438048678,0.430618409294013,0.512121809150327,0.373641160784314,0.15467695
## 1997 0.393917720653945,0.534579492810457,0.264770154248366,0.233929098039216,0.341323470391501,0.42813343
## 1998 0.470469515931373,0.566040571547768,0.439761924183007,0.442647115657857,0.498884956862745,0.26991215
## 1999 0.347872854030501,0.370035533066273,0.343817905240388,0.429063090196078,0.341793150326797,0.70683124
## 2000 0.527826405228758,0.619847947712418,0.588369045751634,0.405842760784314,0.415171534640523,0.41924195
## imgLumB.madLst
## 1995 0.389545882352941,0.232564705882353,0.273263529411765,0.250007058823529,0.424430588235294,0.13953882
## 1996 0.0232564705882353,0.372103529411765,0.290705882352941,0.343032941176471,0.215122352941177,0.0755835
## 1997 0.29652,0.284891764705882,0.209308235294118,0.168609411764706,0.220936470588235,0.430244705882353,0.
## 1998 0.244192941176471,0.279077647058823,0.401174117647059,0.19768,0.273263529411765,0.19768,0.1104682352
## 1999 0.244192941176471,0.220936470588235,0.215122352941176,0.19768,0.203494117647059,0.19768,0.1918658823
## 2000 0.360475294117647,0.168609411764706,0.232564705882353,0.383731764705882,0.302334117647059,0.29652,0.
## imgLumG.meanLst
## 1995 0.238140946405229,0.548952847058824,0.458902854901961,0.521525443137255,0.478348840876833,0.76862168
## 1996 0.0582805647058824,0.421809640960942,0.332203269726121,0.521383780392157,0.399365207843137,0.1100528
## 1997 0.316450486253483,0.328331503267974,0.082146739869281,0.133337662745098,0.283446337464752,0.30938891
## 1998 0.436215839460784,0.428843783896537,0.332023383006536,0.287895330112721,0.570361892810457,0.25169091
## 1999 0.336768244008715,0.382591130789233,0.316603084223013,0.418466405228758,0.27968,0.609746070588235,0.
## 2000 0.493383111111111,0.483684287581699,0.583914122875817,0.384178530718954,0.35006782745098,0.442342692
## imgLumG.madLst
## 1995 0.180237647058824,0.284891764705882,0.459315294117647,0.29652,0.447687058823529,0.0290705882352942,0
## 1996 0.0232564705882353,0.366289411764706,0.226750588235294,0.279077647058823,0.215122352941176,0.0406988
## 1997 0.244192941176471,0.203494117647059,0.0406988235294118,0.0813976470588235,0.220936470588235,0.250007
## 1998 0.273263529411765,0.313962352941176,0.284891764705882,0.156981176470588,0.401174117647059,0.14535294
## 1999 0.255821176470588,0.180237647058824,0.186051764705882,0.19768,0.151167058823529,0.366289411764706,0.
## 2000 0.453501176470588,0.180237647058824,0.255821176470588,0.39536,0.377917647058823,0.377917647058823,0.
## imgCorRBLst
## 1995 0.915882074086517,0.936125073250323,0.925342861360154,0.95381690962518,0.95075431812147,0.9465209374
## 1996 0.915707903872551,0.966378481720423,0.934346917016782,0.96268897194293,0.643998182328561,0.911981240
## 1997 0.909635067030262,0.930481667649553,0.964721579259928,0.96473135067568,0.822767973153162,0.681278098
## 1998 0.938789672798421,0.953786419752476,0.965743941783733,0.957314586604812,0.90900969691132,0.971692049
## 1999 0.976691955602698,0.991480687495777,0.994310999205075,0.958948621865575,0.928456060261588,0.97177173
## 2000 0.934016962866324,0.869249572573419,0.95419123083483,0.99211604226502,0.886322279467387,0.9658431967
## imgCorBGLst
## 1995 0.935348475311361,0.907746100548057,0.881740060814866,0.919159134499559,0.958278552627527,0.92539659
## 1996 0.930539390139604,0.927564419461944,0.702515027026444,0.924665362075104,0.988629836487898,0.95070288
## 1997 0.911882384892067,0.894523192293658,0.504867530958891,0.862265820603762,0.777429363133283,0.85725254
## 1998 0.928691798649667,0.951020630069392,0.919789826556429,0.960960553936278,0.947755142893997,0.88400489
## 1999 0.988345312678391,0.985803860751613,0.981507757311958,0.993538988044571,0.96900794678503,0.931726266
## 2000 0.970598355199557,0.968412763424045,0.951101496698407,0.972648315688969,0.953322354083827,0.97472582
## imgCorGRLst
## 1995 0.847874217677699,0.839388873374213,0.741342316812819,0.823314200012902,0.890242650029409,0.80773112
## 1996 0.905059755750857,0.873195428063052,0.541699306024517,0.901734854545225,0.555726920078307,0.78428335
## 1997 0.774839394682602,0.739965036963889,0.411054415648273,0.765620654385252,0.48388701657771,0.478801440
## 1998 0.772263704112005,0.845496084528121,0.894834599219499,0.902602492286398,0.907992257149128,0.90263960
## 1999 0.949730876675118,0.974034219896241,0.97380365733435,0.946487562452522,0.858092390669452,0.884698159
## 2000 0.85761142022944,0.795548182277695,0.85391291670082,0.954408046169872,0.75334315741689,0.90986349339
## imgCosSmlRBLst
## 1995 0.914731328492761,0.962765734792928,0.915702831960683,0.963918405906571,0.964719154616067,0.97396974
## 1996 0.932706561701828,0.956871939627114,0.876407401582204,0.979315401156653,0.822971526691895,0.85204887
## 1997 0.896376758374088,0.890811821581254,0.736503708305722,0.909504902197789,0.753645293034532,0.77522484
## 1998 0.948081917347424,0.935773975762662,0.966820967152252,0.97213224636034,0.982515417797099,0.969844639
## 1999 0.966107675518083,0.993603970725657,0.991132605154828,0.984185066024919,0.933808988345575,0.97391459
## 2000 0.93791291478598,0.950019937196597,0.974871349325966,0.983550123453508,0.889573084263207,0.972899840
## imgCosSmlBGLst
## 1995 0.96400751272461,0.979393035008292,0.953617606787962,0.982363771043141,0.986170023686445,0.988320992
## 1996 0.944922711965372,0.97461084102372,0.914352348852314,0.983891801393515,0.990462007297541,0.955198499
## 1997 0.965055223339705,0.94701329780031,0.768243680209807,0.944771974411103,0.886433833736298,0.924445197
## 1998 0.982186675708709,0.978837894392259,0.97444586396087,0.990353089548557,0.989599783631801,0.962642900
## 1999 0.99364492733916,0.994085710782682,0.99431263755616,0.998247435581869,0.985014310701524,0.9842854479
## 2000 0.98711053074941,0.990587173088527,0.992068737659828,0.990166614611923,0.971386599427241,0.991620350
## imgCosSmlGRLst
## 1995 0.914731328492761,0.962765734792928,0.915702831960683,0.963918405906571,0.964719154616067,0.97396974
## 1996 0.932706561701828,0.956871939627114,0.876407401582204,0.979315401156653,0.822971526691895,0.85204887
## 1997 0.896376758374088,0.890811821581254,0.736503708305722,0.909504902197789,0.753645293034532,0.77522484
## 1998 0.948081917347424,0.935773975762662,0.966820967152252,0.97213224636034,0.982515417797099,0.969844639
## 1999 0.966107675518083,0.993603970725657,0.991132605154828,0.984185066024919,0.933808988345575,0.97391459
## 2000 0.93791291478598,0.950019937196597,0.974871349325966,0.983550123453508,0.889573084263207,0.972899840
## 'data.frame': 2000 obs. of 18 variables:
## $ business_id : int 1000 1001 100 1006 1010 101 1011 1012 1014 1015 ...
## $ labels : chr "1 2 3 4 5 6 7" "0 1 6 8" "1 2 4 5 6 7" "1 2 4 5 6" ...
## $ nImgs : int 54 9 84 22 11 121 70 37 32 145 ...
## $ imgResXLst : chr "500,375,375,375,375,375,500,500,500,500,500,500,500,500,375,414,373,500,399,375,375,375,500,500,472,478,467,470,375,373,375,375"| __truncated__ "500,375,500,500,500,366,358,444,500" "500,375,375,375,375,500,375,375,500,375,373,375,375,500,375,500,500,500,500,375,375,375,375,375,375,375,375,373,373,375,375,375"| __truncated__ "500,373,281,500,500,500,500,500,500,500,500,396,500,500,500,281,281,375,375,375,375,375" ...
## $ imgResYLst : chr "500,500,500,500,500,500,332,332,332,332,332,375,375,375,500,500,500,389,500,500,500,500,375,375,500,500,500,499,500,500,500,500"| __truncated__ "375,500,375,361,375,500,500,479,373" "375,500,500,500,500,375,500,500,268,500,500,500,500,375,500,375,375,375,375,500,500,500,500,500,500,500,500,500,500,500,500,500"| __truncated__ "375,500,500,273,375,375,375,375,375,399,290,500,500,500,375,500,500,500,500,500,500,500" ...
## $ imgResXYLst : chr "250000,187500,187500,187500,187500,187500,166000,166000,166000,166000,166000,187500,187500,187500,187500,207000,186500,194500,1"| __truncated__ "187500,187500,187500,180500,187500,183000,179000,212676,186500" "187500,187500,187500,187500,187500,187500,187500,187500,134000,187500,186500,187500,187500,187500,187500,187500,187500,187500,1"| __truncated__ "187500,186500,140500,136500,187500,187500,187500,187500,187500,199500,145000,198000,250000,250000,187500,140500,140500,187500,1"| __truncated__ ...
## $ imgLumR.meanLst: chr "0.470262839215686,0.314501103267974,0.373570049673203,0.435050499346405,0.471514373856209,0.528303539869281,0.427085258681786,0"| __truncated__ "0.62211543006536,0.381729024836601,0.515106467973856,0.543710759871816,0.481847780392157,0.443394792671167,0.56832711140322,0.5"| __truncated__ "0.554903968627451,0.333421218300654,0.273489254901961,0.53425428496732,0.290379210457516,0.268196162091503,0.244151926797386,0."| __truncated__ "0.498694567320261,0.238366125216843,0.420795199218477,0.391512317747612,0.146110870588235,0.622516789542484,0.513440376470588,0"| __truncated__ ...
## $ imgLumR.madLst : chr "0.308148235294118,0.145352941176471,0.162795294117647,0.313962352941176,0.122096470588235,0.122096470588235,0.174423529411765,0"| __truncated__ "0.261635294117647,0.180237647058823,0.226750588235294,0.267449411764706,0.308148235294118,0.313962352941176,0.29652,0.250007058"| __truncated__ "0.0930258823529412,0.290705882352941,0.191865882352941,0.215122352941177,0.104654117647059,0.203494117647059,0.145352941176471,"| __truncated__ "0.348847058823529,0.0930258823529412,0.232564705882353,0.203494117647059,0.0348847058823529,0.133724705882353,0.203494117647059"| __truncated__ ...
## $ imgLumB.meanLst: chr "0.400086839215686,0.281435168627451,0.352887864052288,0.391357783006536,0.431559173856209,0.422152951633987,0.408650838648712,0"| __truncated__ "0.40706708496732,0.340096773856209,0.463498896732026,0.406464005214274,0.341465432679739,0.417969720347155,0.531626925183481,0."| __truncated__ "0.549200501960784,0.24384842875817,0.202027189542484,0.379692507189542,0.172298311111111,0.168613291503268,0.158317322875817,0."| __truncated__ "0.441220977777778,0.175660390054145,0.393309106133557,0.223112949795303,0.119968815686275,0.620769233986928,0.362592062745098,0"| __truncated__ ...
## $ imgLumB.madLst : chr "0.319776470588235,0.145352941176471,0.156981176470588,0.29652,0.151167058823529,0.139538823529412,0.180237647058824,0.209308235"| __truncated__ "0.313962352941176,0.156981176470588,0.302334117647059,0.151167058823529,0.186051764705882,0.267449411764706,0.290705882352941,0"| __truncated__ "0.104654117647059,0.19768,0.0872117647058823,0.267449411764706,0.104654117647059,0.145352941176471,0.133724705882353,0.12791058"| __truncated__ "0.29652,0.0639552941176471,0.209308235294118,0.0813976470588235,0.0232564705882353,0.122096470588235,0.203494117647059,0.337218"| __truncated__ ...
## $ imgLumG.meanLst: chr "0.380454243137255,0.246817840522876,0.314116852287582,0.363030546405229,0.427195963398693,0.38872991372549,0.40143399480274,0.3"| __truncated__ "0.279726452287582,0.317789532026144,0.421921568627451,0.433254011188963,0.252531011764706,0.382980027858138,0.520628305400372,0"| __truncated__ "0.503275879738562,0.168192564705882,0.156450175163399,0.279168690196078,0.113039288888889,0.108691116339869,0.10628339869281,0."| __truncated__ "0.296656836601307,0.13043553593019,0.354685172004745,0.170870473317532,0.0825161202614379,0.423250112418301,0.265035294117647,0"| __truncated__ ...
## $ imgLumG.madLst : chr "0.232564705882353,0.127910588235294,0.145352941176471,0.250007058823529,0.186051764705882,0.145352941176471,0.209308235294118,0"| __truncated__ "0.226750588235294,0.139538823529412,0.430244705882353,0.244192941176471,0.116282352941176,0.284891764705882,0.290705882352941,0"| __truncated__ "0.0930258823529412,0.104654117647059,0.0639552941176471,0.255821176470588,0.0872117647058823,0.0872117647058823,0.0930258823529"| __truncated__ "0.151167058823529,0.0406988235294118,0.186051764705882,0.0639552941176471,0.0232564705882353,0.110468235294118,0.22093647058823"| __truncated__ ...
## $ imgCorRBLst : chr "0.970296012735957,0.980985682842824,0.972093026685952,0.983724333941815,0.926027872315143,0.959969403012555,0.895286104308499,0"| __truncated__ "0.89794420182504,0.963494559422061,0.919773168736845,0.69235792837934,0.798992871092365,0.973668862524825,0.959230647422656,0.9"| __truncated__ "0.88980511332014,0.982156271657673,0.986750578890926,0.968016666545741,0.904980631748477,0.935874086787087,0.971535954392398,0."| __truncated__ "0.934801931155002,0.935308855763743,0.978877671329745,0.91944404028056,0.977686193239092,0.99273852306705,0.970659278925212,0.9"| __truncated__ ...
## $ imgCorBGLst : chr "0.927947240419192,0.981032496136578,0.96955277428845,0.955336224085597,0.915268062088783,0.98801384023825,0.945387428624615,0.9"| __truncated__ "0.927140641328733,0.970350055669286,0.920977235836244,0.867252069661227,0.902914133624919,0.982290738773016,0.966423726949923,0"| __truncated__ "0.95460533534165,0.96530200316397,0.941724560255717,0.974498937299788,0.955756380404291,0.967735272285826,0.97064625804634,0.97"| __truncated__ "0.87106377293215,0.962551893540078,0.98976645020501,0.987436732442999,0.965603192699164,0.892732787748495,0.954804684200538,0.9"| __truncated__ ...
## $ imgCorGRLst : chr "0.85219702103209,0.944813172951291,0.920251569196133,0.913370447315881,0.728953194085978,0.94179042725867,0.761124311894968,0.6"| __truncated__ "0.808981100884253,0.88550936318431,0.762637574605669,0.376703880334306,0.578244046412377,0.941295772588174,0.893728995868977,0."| __truncated__ "0.847621407193241,0.92694807051355,0.937592397476484,0.926798909689827,0.820502357384701,0.877048066628061,0.936668909619535,0."| __truncated__ "0.79162264641504,0.892536640871731,0.962771424520778,0.882261983517338,0.909919127928424,0.89294267746995,0.888529932852904,0.9"| __truncated__ ...
## $ imgCosSmlRBLst : chr "0.961120503189668,0.959983778123168,0.971567089324108,0.979125620481285,0.968697673098018,0.988542479997685,0.954435853766244,0"| __truncated__ "0.915274523521204,0.971958916855887,0.919835219182328,0.884919588642168,0.910722614004215,0.979423045009899,0.976136066531435,0"| __truncated__ "0.993414744197245,0.958187824846168,0.964877716984537,0.973537472575526,0.946477604939744,0.949202689790543,0.981041897682667,0"| __truncated__ "0.919247057411873,0.915336561758393,0.991620853519786,0.867311395735846,0.936240775424546,0.977451499648811,0.963174015441138,0"| __truncated__ ...
## $ imgCosSmlBGLst : chr "0.97522013067789,0.986144273136297,0.988225607138478,0.988870895735175,0.989438414537079,0.997178834207598,0.988303322936007,0."| __truncated__ "0.970619992118104,0.992501438726135,0.969513978027899,0.974530959738499,0.979722839640874,0.993661704964874,0.992244441917603,0"| __truncated__ "0.997592656674784,0.981651052987919,0.969675681233784,0.993058742872205,0.985807813173355,0.986553412603165,0.991061521854879,0"| __truncated__ "0.946268118128064,0.971513086772763,0.997785703330826,0.983580476140314,0.974096277906093,0.986053291857065,0.985642088436591,0"| __truncated__ ...
## $ imgCosSmlGRLst : chr "0.961120503189668,0.959983778123168,0.971567089324108,0.979125620481285,0.968697673098018,0.988542479997685,0.954435853766244,0"| __truncated__ "0.915274523521204,0.971958916855887,0.919835219182328,0.884919588642168,0.910722614004215,0.979423045009899,0.976136066531435,0"| __truncated__ "0.993414744197245,0.958187824846168,0.964877716984537,0.973537472575526,0.946477604939744,0.949202689790543,0.981041897682667,0"| __truncated__ "0.919247057411873,0.915336561758393,0.991620853519786,0.867311395735846,0.936240775424546,0.977451499648811,0.963174015441138,0"| __truncated__ ...
## - attr(*, "comment")= chr "glbObsTrn"
## NULL
## [1] "Reading file ./data/test_color.csv..."
## [1] "dimensions of data in ./data/test_color.csv: 10,000 rows x 17 cols"
## [1] " Truncating imgResXLst to first 100 chars..."
## [1] " Truncating imgResYLst to first 100 chars..."
## [1] " Truncating imgResXYLst to first 100 chars..."
## [1] " Truncating imgLumR.meanLst to first 100 chars..."
## [1] " Truncating imgLumR.madLst to first 100 chars..."
## [1] " Truncating imgLumB.meanLst to first 100 chars..."
## [1] " Truncating imgLumB.madLst to first 100 chars..."
## [1] " Truncating imgLumG.meanLst to first 100 chars..."
## [1] " Truncating imgLumG.madLst to first 100 chars..."
## [1] " Truncating imgCorRBLst to first 100 chars..."
## [1] " Truncating imgCorBGLst to first 100 chars..."
## [1] " Truncating imgCorGRLst to first 100 chars..."
## [1] " Truncating imgCosSmlRBLst to first 100 chars..."
## [1] " Truncating imgCosSmlBGLst to first 100 chars..."
## [1] " Truncating imgCosSmlGRLst to first 100 chars..."
## business_id nImgs
## 1 003sg 167
## 2 00er5 210
## 3 00kad 83
## 4 00mc6 15
## 5 00q7x 24
## 6 00v0t 24
## imgResXLst
## 1 375,500,375,375,500,375,500,500,500,500,500,281,500,500,500,373,375,500,375,373,375,500,500,375,500,
## 2 489,500,500,281,375,397,469,500,320,375,500,375,500,375,375,375,500,500,345,375,375,500,500,500,281,
## 3 332,500,500,375,281,375,500,500,500,375,500,500,375,375,375,500,500,500,500,375,500,500,375,500,500,
## 4 375,500,500,375,323,500,500,500,281,500,375,500,500,500,375
## 5 500,500,373,375,500,500,500,500,375,500,375,375,325,500,375,500,500,500,500,375,500,375,375,500
## 6 375,500,375,500,375,500,500,500,500,373,500,375,375,375,500,500,500,500,375,500,500,500,375,375
## imgResYLst
## 1 500,375,500,500,500,500,375,375,350,375,500,500,375,375,373,500,500,500,500,500,500,332,375,500,375,
## 2 500,500,375,500,500,500,314,375,480,500,375,500,375,500,500,500,373,375,500,500,500,375,380,282,500,
## 3 500,375,375,500,500,500,281,281,281,500,375,500,500,500,500,375,375,374,335,500,373,344,500,373,375,
## 4 500,500,375,500,500,375,418,375,500,375,500,375,500,333,500
## 5 375,500,500,500,375,375,372,333,500,375,500,500,500,375,500,375,375,375,375,500,373,500,500,500
## 6 500,375,500,375,500,375,375,375,281,500,375,500,500,500,500,375,375,375,500,500,375,282,500,500
## imgResXYLst
## 1 187500,187500,187500,187500,250000,187500,187500,187500,175000,187500,250000,140500,187500,187500,18
## 2 244500,250000,187500,140500,187500,198500,147266,187500,153600,187500,187500,187500,187500,187500,18
## 3 166000,187500,187500,187500,140500,187500,140500,140500,140500,187500,187500,250000,187500,187500,18
## 4 187500,250000,187500,187500,161500,187500,209000,187500,140500,187500,187500,187500,250000,166500,18
## 5 187500,250000,186500,187500,187500,187500,186000,166500,187500,187500,187500,187500,162500,187500,18
## 6 187500,187500,187500,187500,187500,187500,187500,187500,140500,186500,187500,187500,187500,187500,25
## imgLumR.meanLst
## 1 0.5181632,0.480446138562092,0.602665620915033,0.272232951633987,0.386362196078431,0.383198431372549,
## 2 0.410447443762781,0.585374384313725,0.478584616993464,0.705588863303328,0.434379524183007,0.39564699
## 3 0.631800519725963,0.599587911111111,0.443663518954248,0.653849307189542,0.339964915218757,0.66367995
## 4 0.788479016993464,0.162021160784314,0.491557061437909,0.428982149019608,0.738027535967948,0.27537972
## 5 0.575066896732026,0.649985552941176,0.394181086053724,0.600689568627451,0.656511623529412,0.38200991
## 6 0.632097756862745,0.843626248366013,0.580465254901961,0.483202488888889,0.283668517647059,0.42447523
## imgLumR.madLst
## 1 0.244192941176471,0.319776470588235,0.273263529411765,0.133724705882353,0.389545882352941,0.20349411
## 2 0.174423529411765,0.34303294117647,0.267449411764706,0.15116705882353,0.122096470588235,0.2848917647
## 3 0.19768,0.0581411764705882,0.0755835294117648,0.209308235294118,0.209308235294118,0.226750588235294,
## 4 0.09884,0.0348847058823529,0.372103529411765,0.284891764705882,0.261635294117647,0.139538823529412,0
## 5 0.0406988235294118,0.162795294117647,0.29652,0.255821176470588,0.145352941176471,0.244192941176471,0
## 6 0.424430588235294,0.104654117647059,0.215122352941176,0.116282352941177,0.162795294117647,0.33721882
## imgLumB.meanLst
## 1 0.482250269281046,0.454540779084967,0.521648376470588,0.217321830065359,0.21917342745098,0.275709071
## 2 0.348579542082682,0.428113380392157,0.368442980392157,0.565004144860791,0.437527759477124,0.30091966
## 3 0.308795913064021,0.511195440522876,0.384128062745098,0.461310933333333,0.296774042285954,0.36901076
## 4 0.447383299346405,0.132578854901961,0.380563430065359,0.386758546405229,0.730730917258544,0.23315135
## 5 0.450449087581699,0.563627011764706,0.375317100352205,0.408973281045752,0.458768899346405,0.31968140
## 6 0.593823267973856,0.78756714248366,0.536652130718954,0.270589218300654,0.260425620915033,0.352570833
## imgLumB.madLst
## 1 0.261635294117647,0.261635294117647,0.366289411764706,0.0639552941176471,0.156981176470588,0.1453529
## 2 0.104654117647059,0.343032941176471,0.261635294117647,0.255821176470588,0.180237647058824,0.25000705
## 3 0.250007058823529,0.0465129411764706,0.0755835294117647,0.220936470588235,0.19768,0.232564705882353,
## 4 0.122096470588235,0.0290705882352941,0.290705882352941,0.232564705882353,0.267449411764706,0.1279105
## 5 0.034884705882353,0.261635294117647,0.313962352941176,0.226750588235294,0.215122352941176,0.24419294
## 6 0.482571764705882,0.156981176470588,0.250007058823529,0.215122352941176,0.191865882352941,0.30233411
## imgLumG.meanLst
## 1 0.430285009150327,0.275434583006536,0.462405437908497,0.101890865359477,0.249514917647059,0.24060841
## 2 0.309533084726733,0.330929819607843,0.301213992156863,0.449378549996511,0.461475576470588,0.27994800
## 3 0.11533453815261,0.356775152941176,0.305095362091503,0.296019283660131,0.245902170120717,0.158195952
## 4 0.51544725751634,0.116746415686275,0.310151654901961,0.400247383006536,0.693268475687489,0.207954070
## 5 0.308603273202614,0.4115576,0.338701803080482,0.23277642875817,0.334515074509804,0.281685124183007,0
## 6 0.530171126797386,0.734699482352941,0.469050729411765,0.251249275816993,0.270162509803922,0.30208974
## imgLumG.madLst
## 1 0.290705882352941,0.215122352941176,0.313962352941176,0.0290705882352941,0.0930258823529412,0.116282
## 2 0.0813976470588235,0.220936470588235,0.180237647058824,0.29652,0.220936470588235,0.220936470588235,0
## 3 0.0406988235294118,0.0755835294117648,0.09884,0.220936470588235,0.151167058823529,0.116282352941176,
## 4 0.127910588235294,0.0290705882352941,0.226750588235294,0.191865882352941,0.255821176470588,0.1162823
## 5 0.0465129411764706,0.343032941176471,0.290705882352941,0.116282352941176,0.209308235294118,0.2034941
## 6 0.465129411764706,0.186051764705882,0.331404705882353,0.151167058823529,0.174423529411765,0.25582117
## imgCorRBLst
## 1 0.971848384633669,0.844142924409889,0.933626874258348,0.969810338166802,0.781499439147351,0.96029374
## 2 0.977826640295478,0.970093800087844,0.958117848866376,0.826425463623886,0.902990540155383,0.95466981
## 3 0.751064433278075,0.855668373119896,0.972904554828568,0.939937456381645,0.926619297264817,0.92939373
## 4 0.555820014464522,0.987192718776747,0.959839340799832,0.959920131196621,0.995094722698392,0.95121547
## 5 0.992609192367127,0.952903266162272,0.924606994210252,0.919888900332278,0.927916110443575,0.96593965
## 6 0.979131337629565,0.929946607878104,0.975062216519485,0.817334337377692,0.983240871684203,0.93620417
## imgCorBGLst
## 1 0.966759517902915,0.838294605196521,0.926032937557564,0.690245987299617,0.927094333403652,0.98147762
## 2 0.979675281758249,0.931181159776226,0.976943530052098,0.922761873692084,0.979859433730912,0.97862132
## 3 0.800172977668431,0.940824315292693,0.983183564231686,0.966469004416202,0.948687307759127,0.91660611
## 4 0.963092912451779,0.986737806214834,0.987758595775795,0.944562407566799,0.992731818832544,0.96687482
## 5 0.988859901326361,0.921854759744224,0.906671013862309,0.893888835750382,0.962727047104801,0.95945811
## 6 0.98168053884346,0.949068700955111,0.94405958053516,0.97243791807566,0.990013165258653,0.95095772939
## imgCorGRLst
## 1 0.908233427536138,0.662158186456318,0.790033846396902,0.661825133591838,0.706251827780452,0.92733706
## 2 0.939796742137535,0.853767975534679,0.926302731451899,0.727291203433466,0.831679683293415,0.91766632
## 3 0.441646436230003,0.683247381338638,0.951309332579077,0.90104916738589,0.790516505951237,0.775943981
## 4 0.652874422660011,0.960026571284438,0.957548749840944,0.893443330148534,0.97897232994291,0.905843186
## 5 0.974959836789256,0.787664551846631,0.768649320768159,0.697549296731466,0.855147576532709,0.86322975
## 6 0.944733732055498,0.787750679519903,0.87036877862435,0.826045468306032,0.97253553542978,0.8094007741
## imgCosSmlRBLst
## 1 0.979683847698728,0.842423493063493,0.927650747647504,0.766240959072844,0.889905947268674,0.97129720
## 2 0.981250004820448,0.960622449715253,0.964180727999246,0.910476761778145,0.967889643398825,0.96201650
## 3 0.588207431002124,0.957670991272398,0.99119061669802,0.947822259934586,0.871121143446002,0.937656042
## 4 0.97594530245552,0.975293999039549,0.969604590075432,0.973708147249703,0.998046338257088,0.968532835
## 5 0.993975937397629,0.921482797227794,0.925027232083674,0.858536192757857,0.942189327212559,0.94474994
## 6 0.976047920837209,0.971843848743254,0.967996402696946,0.950637278026409,0.993369948797711,0.93567475
## imgCosSmlBGLst
## 1 0.99233923888975,0.896735079767823,0.973423361287955,0.77963268894566,0.935925242505431,0.9924672161
## 2 0.989000892129007,0.981571328444779,0.990248013460088,0.970851734862672,0.995231073626733,0.99079674
## 3 0.815204820449962,0.987659243028195,0.997049498593563,0.987396333282936,0.959653443396857,0.97569520
## 4 0.996816305782785,0.989285962130579,0.991925036311123,0.985214131962629,0.999186777449118,0.98871761
## 5 0.997648933230918,0.962598680540705,0.968930559805157,0.935459683780001,0.981940079791729,0.98287981
## 6 0.992549973331107,0.991560903815897,0.985215261388636,0.985884075333517,0.994081434894028,0.98264613
## imgCosSmlGRLst
## 1 0.979683847698728,0.842423493063493,0.927650747647504,0.766240959072844,0.889905947268674,0.97129720
## 2 0.981250004820448,0.960622449715253,0.964180727999246,0.910476761778145,0.967889643398825,0.96201650
## 3 0.588207431002124,0.957670991272398,0.99119061669802,0.947822259934586,0.871121143446002,0.937656042
## 4 0.97594530245552,0.975293999039549,0.969604590075432,0.973708147249703,0.998046338257088,0.968532835
## 5 0.993975937397629,0.921482797227794,0.925027232083674,0.858536192757857,0.942189327212559,0.94474994
## 6 0.976047920837209,0.971843848743254,0.967996402696946,0.950637278026409,0.993369948797711,0.93567475
## business_id nImgs
## 12 01mrb 62
## 1789 6ey8p 40
## 3881 dqqme 117
## 3912 dv9lg 15
## 4024 ebyno 128
## 4625 gkb3z 44
## imgResXLst
## 12 500,500,500,500,500,375,500,500,373,500,500,500,375,375,375,333,500,375,375,500,500,375,332,500,464,
## 1789 500,373,375,500,500,375,500,500,375,500,375,500,360,500,500,500,375,375,500,500,500,500,500,350,500,
## 3881 500,500,375,500,500,375,500,375,375,500,375,500,500,281,375,375,376,500,500,375,375,500,500,281,375,
## 3912 281,500,500,281,375,500,500,362,500,500,375,500,500,281,500
## 4024 375,375,375,500,372,375,500,500,373,500,375,375,500,500,373,282,375,500,500,281,375,375,500,500,375,
## 4625 375,500,500,500,375,500,500,500,500,500,500,500,375,500,500,500,373,375,375,375,375,500,500,375,500,
## imgResYLst
## 12 375,333,375,375,375,500,375,375,500,375,375,334,500,500,500,500,375,500,500,500,375,500,500,375,368,
## 1789 375,500,500,333,500,500,373,500,500,333,500,375,450,375,375,375,500,500,373,375,374,375,375,263,373,
## 3881 334,433,500,375,375,500,375,500,500,500,500,299,375,500,500,500,500,375,375,500,500,375,500,500,500,
## 3912 500,375,375,500,500,375,373,500,375,375,500,375,375,500,375
## 4024 500,500,500,375,500,500,281,500,500,375,500,500,500,319,500,500,500,375,375,500,500,500,375,375,500,
## 4625 500,375,375,375,500,375,375,375,373,469,373,373,500,442,413,373,500,500,500,500,500,375,375,500,375,
## imgResXYLst
## 12 187500,166500,187500,187500,187500,187500,187500,187500,186500,187500,187500,167000,187500,187500,18
## 1789 187500,186500,187500,166500,250000,187500,186500,250000,187500,166500,187500,187500,162000,187500,18
## 3881 167000,216500,187500,187500,187500,187500,187500,187500,187500,250000,187500,149500,187500,140500,18
## 3912 140500,187500,187500,140500,187500,187500,186500,181000,187500,187500,187500,187500,187500,140500,18
## 4024 187500,187500,187500,187500,186000,187500,140500,250000,186500,187500,187500,187500,250000,159500,18
## 4625 187500,187500,187500,187500,187500,187500,187500,187500,186500,234500,186500,186500,187500,221000,20
## imgLumR.meanLst
## 12 0.321910797385621,0.3854524877819,0.639106990849673,0.522030180392157,0.520816501960784,0.4206551215
## 1789 0.521053908496732,0.186368858749934,0.46149611503268,0.722819313431078,0.605935984313725,0.454702745
## 3881 0.427333450745568,0.522886020921071,0.446034070588235,0.320632763398693,0.471437678431373,0.1927456,
## 3912 0.482311576303119,0.374859586928105,0.515909270588235,0.500595185262717,0.299820758169935,0.45293634
## 4024 0.801305349019608,0.244255309803922,0.217875220915033,0.315027304575163,0.436918131983976,0.37739254
## 4625 0.440409725490196,0.620761558169935,0.836215215686275,0.813851252287582,0.539769328104575,0.81810752
## imgLumR.madLst
## 12 0.255821176470588,0.215122352941176,0.238378823529412,0.401174117647059,0.529084705882353,0.16279529
## 1789 0.122096470588235,0.122096470588235,0.517456470588235,0.104654117647059,0.232564705882353,0.32559058
## 3881 0.186051764705882,0.39536,0.255821176470588,0.174423529411765,0.343032941176471,0.0348847058823529,0
## 3912 0.151167058823529,0.19768,0.372103529411765,0.279077647058823,0.250007058823529,0.290705882352941,0.
## 4024 0.139538823529412,0.145352941176471,0.191865882352941,0.145352941176471,0.366289411764706,0.40698823
## 4625 0.19768,0.110468235294118,0.063955294117647,0.0523270588235294,0.203494117647059,0.0639552941176472,
## imgLumB.meanLst
## 12 0.229827764705882,0.325976423482306,0.599767362091503,0.352829866666667,0.461769683660131,0.28045753
## 1789 0.489039163398693,0.121089291909793,0.388714833986928,0.589186362833422,0.592677098039216,0.37544698
## 3881 0.31938731947869,0.339586342435358,0.310304752941176,0.218079623529412,0.452070190849673,0.190028883
## 3912 0.331832084292792,0.355600125490196,0.457275984313726,0.387275444839858,0.214325960784314,0.42950199
## 4024 0.669136732026144,0.248044611764706,0.134653845751634,0.282943602614379,0.31519394897744,0.233203722
## 4625 0.365748517647059,0.528660873202614,0.751302253594771,0.738910912418301,0.432722049673203,0.79498329
## imgLumB.madLst
## 12 0.174423529411765,0.174423529411765,0.279077647058823,0.319776470588235,0.500014117647059,0.20930823
## 1789 0.186051764705882,0.0581411764705882,0.436058823529412,0.156981176470588,0.215122352941177,0.3139623
## 3881 0.226750588235294,0.331404705882353,0.267449411764706,0.110468235294118,0.331404705882353,0.03488470
## 3912 0.156981176470588,0.238378823529412,0.311055294117647,0.267449411764706,0.162795294117647,0.34884705
## 4024 0.186051764705882,0.133724705882353,0.0930258823529412,0.122096470588235,0.261635294117647,0.19768,0
## 4625 0.348847058823529,0.319776470588235,0.104654117647059,0.0813976470588236,0.284891764705882,0.0697694
## imgLumG.meanLst
## 12 0.153664230065359,0.260634893717247,0.579493521568627,0.196306175163399,0.379675356862745,0.21094238
## 1789 0.394977568627451,0.0906502864952952,0.208521934640523,0.452220526408762,0.547508549019608,0.3153831
## 3881 0.208816132441,0.241515029660825,0.272262316339869,0.181486159477124,0.426628935947712,0.17523013856
## 3912 0.262966548042705,0.318771346405229,0.440966818300654,0.348672918847254,0.171186530718954,0.36173059
## 4024 0.583177537254902,0.188824407843137,0.0901031947712418,0.258914070588235,0.232370040059034,0.0982101
## 4625 0.497264062745098,0.455144407843137,0.651355712418301,0.587818269281046,0.336371304575163,0.66650313
## imgLumG.madLst
## 12 0.127910588235294,0.139538823529412,0.261635294117647,0.145352941176471,0.418616470588235,0.18605176
## 1789 0.279077647058824,0.0465129411764706,0.186051764705882,0.29652,0.232564705882353,0.232564705882353,0
## 3881 0.162795294117647,0.168609411764706,0.226750588235294,0.0697694117647059,0.244192941176471,0.0406988
## 3912 0.156981176470588,0.203494117647059,0.215122352941176,0.255821176470588,0.110468235294118,0.26744941
## 4024 0.174423529411765,0.09884,0.0465129411764706,0.139538823529412,0.110468235294118,0.0465129411764706,
## 4625 0.267449411764706,0.558155294117647,0.110468235294118,0.0872117647058824,0.232564705882353,0.1337247
## imgCorRBLst
## 12 0.935705112469475,0.96333952042565,0.986519487319998,0.949356065322578,0.962896240410316,0.917127253
## 1789 0.931029757342648,0.882009337422199,0.969754096649183,0.919773371710625,0.945424518090047,0.93772965
## 3881 0.863438369677723,0.972152781116247,0.913297340570321,0.981749128615078,0.990731119575413,0.98873155
## 3912 0.772408627846024,0.974663241118569,0.976576433964581,0.85440465005897,0.954156392372715,0.988580901
## 4024 0.952220550698071,0.96823823211576,0.9377694743709,0.979853258508986,0.883971628677851,0.94677410021
## 4625 0.703018243311758,0.888144775110649,0.78247391116938,0.714748791367109,0.949079384036139,0.927552384
## imgCorBGLst
## 12 0.950849688450872,0.967944521839936,0.996004882142471,0.826090055040413,0.958735251278242,0.96460794
## 1789 0.953739347719723,0.962638716746981,0.826851265054117,0.919320522493232,0.943677075633523,0.98902148
## 3881 0.972024024017077,0.984670141066203,0.972924063910025,0.974692412433626,0.949537284883593,0.99216928
## 3912 0.985814631427115,0.9788337994384,0.986258334116213,0.970395137794206,0.990837140423891,0.9396765810
## 4024 0.977741238123994,0.939148799815802,0.870795674595713,0.94776855361584,0.859615038397928,0.825546862
## 4625 0.929313870859003,0.938321090081476,0.906058483665499,0.825122235752908,0.953924029501792,0.81942113
## imgCorGRLst
## 12 0.887860715252575,0.896885552166663,0.971905106979938,0.778650643214159,0.862408304132884,0.87354641
## 1789 0.863302565379926,0.814095384917805,0.735510174833602,0.758893484705637,0.825079596112513,0.89155453
## 3881 0.854846101407409,0.944880255465187,0.87425162411118,0.952558910252983,0.925293533560788,0.972109575
## 3912 0.760050991265081,0.933376334325643,0.951225875398876,0.791880595942377,0.932107054376074,0.90676966
## 4024 0.893414371711449,0.929515488631303,0.759183414741214,0.894117829895208,0.674708206504294,0.65512591
## 4625 0.555507987323199,0.721506292732668,0.538455874469056,0.32285285649768,0.851882519646554,0.718674717
## imgCosSmlRBLst
## 12 0.945561139674379,0.957615171197317,0.985960435669806,0.918651214817226,0.931635591785116,0.96281400
## 1789 0.961466021060852,0.852898388573001,0.875974358745021,0.947083568291598,0.970991462096974,0.94684614
## 3881 0.937284109748799,0.941739439995467,0.960005456195244,0.970236922186599,0.976511211659141,0.98148153
## 3912 0.887709277213202,0.983486156282812,0.982243678659893,0.917138870628929,0.95308643327147,0.974441117
## 4024 0.974380008011365,0.96507307102828,0.902430675739916,0.969492895995546,0.886523860347954,0.806205113
## 4625 0.896160645939034,0.891970956876756,0.909056094697014,0.861934706554335,0.961339045789571,0.94814525
## imgCosSmlBGLst
## 12 0.975650322646374,0.985327874363082,0.997522976878825,0.929872541286589,0.975969274062516,0.98965745
## 1789 0.979255126946868,0.969065577082543,0.915400900547962,0.98056844804467,0.990719179994378,0.993380040
## 3881 0.985962268026845,0.990300028576612,0.991178011645077,0.987379556556074,0.983967332323931,0.99473870
## 3912 0.982928331668051,0.994686201645319,0.995567579247454,0.987283339191241,0.993672833695452,0.98364498
## 4024 0.994815666487928,0.968311174127614,0.938507105570118,0.985228112890257,0.949898869463931,0.89449996
## 4625 0.970854512172922,0.970178037823765,0.96846420197426,0.936555550972648,0.988549644568245,0.961908571
## imgCosSmlGRLst
## 12 0.945561139674379,0.957615171197317,0.985960435669806,0.918651214817226,0.931635591785116,0.96281400
## 1789 0.961466021060852,0.852898388573001,0.875974358745021,0.947083568291598,0.970991462096974,0.94684614
## 3881 0.937284109748799,0.941739439995467,0.960005456195244,0.970236922186599,0.976511211659141,0.98148153
## 3912 0.887709277213202,0.983486156282812,0.982243678659893,0.917138870628929,0.95308643327147,0.974441117
## 4024 0.974380008011365,0.96507307102828,0.902430675739916,0.969492895995546,0.886523860347954,0.806205113
## 4625 0.896160645939034,0.891970956876756,0.909056094697014,0.861934706554335,0.961339045789571,0.94814525
## business_id nImgs
## 9995 zyrif 89
## 9996 zyvg6 16
## 9997 zyvjj 27
## 9998 zz8g4 118
## 9999 zzxkg 154
## 10000 zzxwm 13
## imgResXLst
## 9995 375,500,375,500,500,500,375,375,500,500,375,375,500,375,375,500,500,281,281,500,500,375,375,500,500,
## 9996 500,500,375,373,500,500,500,375,500,375,500,375,280,375,500,375
## 9997 500,375,500,500,500,500,500,402,500,373,500,375,500,500,500,500,375,500,500,375,500,375,500,500,281,
## 9998 375,500,375,500,375,375,375,500,500,375,500,500,500,500,500,499,500,500,500,375,282,500,500,500,375,
## 9999 500,500,500,500,375,500,500,500,375,375,375,299,500,500,375,500,500,375,500,500,500,373,500,281,500,
## 10000 500,373,500,281,500,375,333,375,375,218,500,500,500
## imgResYLst
## 9995 500,375,500,375,375,375,500,500,500,281,500,500,375,500,500,375,375,500,500,373,375,500,500,500,373,
## 9996 375,375,500,500,375,500,375,500,375,500,375,500,500,500,375,500
## 9997 373,500,375,375,406,373,373,315,373,500,375,500,281,373,375,375,500,280,373,500,375,500,375,375,500,
## 9998 500,375,500,232,500,500,500,375,375,500,375,375,375,373,500,323,334,373,375,500,500,335,375,280,500,
## 9999 375,375,375,375,500,373,375,375,500,500,500,500,375,500,500,375,371,500,281,375,375,500,375,500,375,
## 10000 281,500,299,500,374,500,500,500,500,211,375,375,281
## imgResXYLst
## 9995 187500,187500,187500,187500,187500,187500,187500,187500,250000,140500,187500,187500,187500,187500,18
## 9996 187500,187500,187500,186500,187500,250000,187500,187500,187500,187500,187500,187500,140000,187500,18
## 9997 186500,187500,187500,187500,203000,186500,186500,126630,186500,186500,187500,187500,140500,186500,18
## 9998 187500,187500,187500,116000,187500,187500,187500,187500,187500,187500,187500,187500,187500,186500,25
## 9999 187500,187500,187500,187500,187500,186500,187500,187500,187500,187500,187500,149500,187500,250000,18
## 10000 140500,186500,149500,140500,187000,187500,166500,187500,187500,45998,187500,187500,140500
## imgLumR.meanLst
## 9995 0.481607843137255,0.638274321568627,0.494432146405229,0.534562300653595,0.684518755555556,0.53623334
## 9996 0.700273296732026,0.442690907189543,0.484047414379085,0.483600063081533,0.409927989542484,0.69451080
## 9997 0.503836093150397,0.490213019607843,0.429142316339869,0.693477981699346,0.541181860330339,0.41179832
## 9998 0.515775037908497,0.523231790849673,0.416210112418301,0.473331203515889,0.503343728104575,0.22677389
## 9999 0.537484360784314,0.544833066666667,0.34471882875817,0.579165532026144,0.403394509803922,0.708402859
## 10000 0.376004828693043,0.266584997108763,0.463301829628172,0.30734823808527,0.673750445632799,0.745096052
## imgLumR.madLst
## 9995 0.244192941176471,0.29652,0.255821176470588,0.273263529411765,0.255821176470588,0.186051764705882,0.
## 9996 0.0872117647058824,0.366289411764706,0.122096470588235,0.180237647058824,0.284891764705882,0.2848917
## 9997 0.377917647058823,0.424430588235294,0.372103529411765,0.238378823529412,0.604668235294118,0.30814823
## 9998 0.104654117647059,0.354661176470588,0.313962352941176,0.360475294117647,0.325590588235294,0.14535294
## 9999 0.267449411764706,0.290705882352941,0.255821176470588,0.29652,0.302334117647059,0.191865882352941,0.
## 10000 0.255821176470588,0.116282352941176,0.244192941176471,0.151167058823529,0.267449411764706,0.05232705
## imgLumB.meanLst
## 9995 0.412798975163399,0.567996988235294,0.418845783006536,0.466652569934641,0.59051777254902,0.438435304
## 9996 0.633824585620915,0.399669145098039,0.324285239215686,0.458190064658571,0.34547722875817,0.577978745
## 9997 0.459675739893813,0.457637437908497,0.401228674509804,0.530668423529412,0.409261817830581,0.25960559
## 9998 0.513344020915033,0.475504962091503,0.336782996078431,0.378356118999324,0.379243043137255,0.14995080
## 9999 0.447615518954248,0.461685584313725,0.289852569934641,0.523310933333333,0.287040167320261,0.66990073
## 10000 0.392093838531854,0.270985943331756,0.327946199750803,0.223512916056102,0.384306050120583,0.71513627
## imgLumB.madLst
## 9995 0.250007058823529,0.348847058823529,0.244192941176471,0.360475294117647,0.325590588235294,0.23256470
## 9996 0.0930258823529412,0.337218823529412,0.110468235294118,0.19768,0.215122352941176,0.372103529411765,0
## 9997 0.39536,0.39536,0.383731764705882,0.366289411764706,0.389545882352941,0.209308235294118,0.1569811764
## 9998 0.0872117647058824,0.377917647058824,0.255821176470588,0.354661176470588,0.39536,0.09884,0.343032941
## 9999 0.319776470588235,0.290705882352941,0.220936470588235,0.436058823529412,0.180237647058824,0.22093647
## 10000 0.290705882352941,0.116282352941176,0.279077647058823,0.09884,0.29652,0.0813976470588236,0.279077647
## imgLumG.meanLst
## 9995 0.372869166013072,0.443214305882353,0.377411890196078,0.353245071895425,0.486126473202614,0.38937510
## 9996 0.589913035294118,0.388701532026144,0.222732590849673,0.4487209588393,0.297075681045752,0.4854206431
## 9997 0.399999158912895,0.452639288888889,0.326987754248366,0.365680209150327,0.277237631604366,0.28739984
## 9998 0.501077793464052,0.394963325490196,0.286057035294118,0.320560885733604,0.305726305882353,0.06532408
## 9999 0.399182954248366,0.389918305882353,0.241603262745098,0.493932193464052,0.201010509803922,0.56567965
## 10000 0.396649501081571,0.24355950165589,0.303738658272674,0.192735691856814,0.24436256684492,0.6852845803
## imgLumG.madLst
## 9995 0.19768,0.383731764705882,0.220936470588235,0.308148235294118,0.424430588235294,0.226750588235294,0.
## 9996 0.09884,0.319776470588235,0.0697694117647059,0.232564705882353,0.186051764705882,0.383731764705882,0
## 9997 0.430244705882353,0.366289411764706,0.360475294117647,0.290705882352941,0.0930258823529412,0.2441929
## 9998 0.0755835294117648,0.389545882352941,0.226750588235294,0.279077647058823,0.255821176470588,0.0406988
## 9999 0.29652,0.348847058823529,0.168609411764706,0.563969411764706,0.104654117647059,0.337218823529412,0.
## 10000 0.331404705882353,0.122096470588235,0.284891764705882,0.0697694117647059,0.151167058823529,0.1104682
## imgCorRBLst
## 9995 0.986668907300573,0.95567672787087,0.961486769497909,0.967940489055698,0.889047463715924,0.962206824
## 9996 0.984775405651682,0.941243597031432,0.962957406036832,0.812849353829791,0.944231613728544,0.95172777
## 9997 0.943362807368658,0.97654070979697,0.983441881798081,0.926525809520263,0.958488783816911,0.834062340
## 9998 0.887463872498118,0.965466471114394,0.91817608317058,0.936975799159143,0.968374716604802,0.972165170
## 9999 0.94385450983331,0.933186571251513,0.928828073947525,0.949761349823606,0.909153824649407,0.910487112
## 10000 0.916887324933198,0.994206192438183,0.897338484780619,0.915190916547783,0.760384847829794,0.95213779
## imgCorBGLst
## 9995 0.978889905836607,0.948577374602602,0.962413425516545,0.906943495075174,0.929844145415956,0.97337533
## 9996 0.994763524049693,0.991826237503051,0.949684021426896,0.908728242874223,0.961461864629689,0.96253996
## 9997 0.946248365824316,0.983170571296997,0.931675828156337,0.929243014274811,0.968287862604719,0.86309962
## 9998 0.977003606846291,0.976685803166609,0.974790783224431,0.957348225240581,0.980481408636699,0.94691895
## 9999 0.979902415188118,0.82027431804104,0.931203716738304,0.976124424958464,0.776360264671529,0.930297661
## 10000 0.851002004065099,0.992829373378876,0.888419683835157,0.970579408284495,0.90401977403274,0.918073714
## imgCorGRLst
## 9995 0.944294988670002,0.889867938562532,0.903858589846466,0.798908731579298,0.797750291967973,0.92513196
## 9996 0.97370767648882,0.933974844510638,0.89315036658354,0.579356911393526,0.850030004495771,0.8629239370
## 9997 0.816527100119178,0.940024435335754,0.905868899677257,0.776526400500627,0.877959179071194,0.81962853
## 9998 0.798368746804096,0.939097463678511,0.899111117113108,0.846680486259747,0.916483986445963,0.89653094
## 9999 0.925483260731398,0.650949654974656,0.873348554304762,0.881010052000508,0.562978393404076,0.79492813
## 10000 0.838558888181431,0.984582220173422,0.796152742217831,0.822078396090011,0.612440033735433,0.82590052
## imgCosSmlRBLst
## 9995 0.981425607721263,0.948567725329605,0.978342024359633,0.92557022510278,0.945193775908215,0.971167641
## 9996 0.995468831824797,0.977786003836395,0.984929521693662,0.924882279136455,0.930908056847003,0.95114398
## 9997 0.93607959294032,0.979252672405356,0.962693458255754,0.905131096029675,0.92581084461737,0.9076214145
## 9998 0.989161917597553,0.968059415443184,0.9542755479087,0.915243619004954,0.963711470163835,0.9363169524
## 9999 0.967769917844093,0.906091912101598,0.942617604897767,0.949034179018943,0.779826431061751,0.94009924
## 10000 0.948732480516347,0.994087161472735,0.931479299537262,0.86864885085648,0.773989615392037,0.981931346
## imgCosSmlBGLst
## 9995 0.994163458638827,0.972659514518748,0.991462164257466,0.962566170323933,0.975384878900411,0.99303093
## 9996 0.999215815945766,0.997240531820938,0.993925761925935,0.980044176110403,0.980975529946331,0.98467541
## 9997 0.979369095747662,0.994012548264405,0.972506313594491,0.964923206227788,0.98083594109022,0.926123792
## 9998 0.999029050848786,0.985506856149071,0.987596225548368,0.976000814414543,0.9918570061853,0.9658762470
## 9999 0.99259165416909,0.948584625270244,0.969169125550141,0.987784027684158,0.881949268603677,0.970074004
## 10000 0.95161952331471,0.997242198074098,0.961157001930771,0.976509065465343,0.933153788860195,0.991007641
## imgCosSmlGRLst
## 9995 0.981425607721263,0.948567725329605,0.978342024359633,0.92557022510278,0.945193775908215,0.971167641
## 9996 0.995468831824797,0.977786003836395,0.984929521693662,0.924882279136455,0.930908056847003,0.95114398
## 9997 0.93607959294032,0.979252672405356,0.962693458255754,0.905131096029675,0.92581084461737,0.9076214145
## 9998 0.989161917597553,0.968059415443184,0.9542755479087,0.915243619004954,0.963711470163835,0.9363169524
## 9999 0.967769917844093,0.906091912101598,0.942617604897767,0.949034179018943,0.779826431061751,0.94009924
## 10000 0.948732480516347,0.994087161472735,0.931479299537262,0.86864885085648,0.773989615392037,0.981931346
## 'data.frame': 10000 obs. of 17 variables:
## $ business_id : chr "003sg" "00er5" "00kad" "00mc6" ...
## $ nImgs : int 167 210 83 15 24 24 40 10 49 10 ...
## $ imgResXLst : chr "375,500,375,375,500,375,500,500,500,500,500,281,500,500,500,373,375,500,375,373,375,500,500,375,500,500,375,279,500,375,500,500"| __truncated__ "489,500,500,281,375,397,469,500,320,375,500,375,500,375,375,375,500,500,345,375,375,500,500,500,281,373,500,375,500,375,500,375"| __truncated__ "332,500,500,375,281,375,500,500,500,375,500,500,375,375,375,500,500,500,500,375,500,500,375,500,500,500,500,432,281,373,500,297"| __truncated__ "375,500,500,375,323,500,500,500,281,500,375,500,500,500,375" ...
## $ imgResYLst : chr "500,375,500,500,500,500,375,375,350,375,500,500,375,375,373,500,500,500,500,500,500,332,375,500,375,375,500,500,375,500,375,500"| __truncated__ "500,500,375,500,500,500,314,375,480,500,375,500,375,500,500,500,373,375,500,500,500,375,380,282,500,500,281,500,361,500,375,500"| __truncated__ "500,375,375,500,500,500,281,281,281,500,375,500,500,500,500,375,375,374,335,500,373,344,500,373,375,375,333,500,500,500,299,500"| __truncated__ "500,500,375,500,500,375,418,375,500,375,500,375,500,333,500" ...
## $ imgResXYLst : chr "187500,187500,187500,187500,250000,187500,187500,187500,175000,187500,250000,140500,187500,187500,186500,186500,187500,250000,1"| __truncated__ "244500,250000,187500,140500,187500,198500,147266,187500,153600,187500,187500,187500,187500,187500,187500,187500,186500,187500,1"| __truncated__ "166000,187500,187500,187500,140500,187500,140500,140500,140500,187500,187500,250000,187500,187500,187500,187500,187500,187000,1"| __truncated__ "187500,250000,187500,187500,161500,187500,209000,187500,140500,187500,187500,187500,250000,166500,187500" ...
## $ imgLumR.meanLst: chr "0.5181632,0.480446138562092,0.602665620915033,0.272232951633987,0.386362196078431,0.383198431372549,0.684618290196078,0.5337991"| __truncated__ "0.410447443762781,0.585374384313725,0.478584616993464,0.705588863303328,0.434379524183007,0.39564699955549,0.444836461060325,0."| __truncated__ "0.631800519725963,0.599587911111111,0.443663518954248,0.653849307189542,0.339964915218757,0.663679958169935,0.453291438141093,0"| __truncated__ "0.788479016993464,0.162021160784314,0.491557061437909,0.428982149019608,0.738027535967948,0.275379722875817,0.533679388310348,0"| __truncated__ ...
## $ imgLumR.madLst : chr "0.244192941176471,0.319776470588235,0.273263529411765,0.133724705882353,0.389545882352941,0.203494117647059,0.139538823529412,0"| __truncated__ "0.174423529411765,0.34303294117647,0.267449411764706,0.15116705882353,0.122096470588235,0.284891764705882,0.29652,0.11628235294"| __truncated__ "0.19768,0.0581411764705882,0.0755835294117648,0.209308235294118,0.209308235294118,0.226750588235294,0.331404705882353,0.2383788"| __truncated__ "0.09884,0.0348847058823529,0.372103529411765,0.284891764705882,0.261635294117647,0.139538823529412,0.523270588235294,0.12791058"| __truncated__ ...
## $ imgLumB.meanLst: chr "0.482250269281046,0.454540779084967,0.521648376470588,0.217321830065359,0.21917342745098,0.275709071895425,0.655838640522876,0."| __truncated__ "0.348579542082682,0.428113380392157,0.368442980392157,0.565004144860791,0.437527759477124,0.300919662172174,0.468127861468763,0"| __truncated__ "0.308795913064021,0.511195440522876,0.384128062745098,0.461310933333333,0.296774042285954,0.369010760784314,0.376557393063987,0"| __truncated__ "0.447383299346405,0.132578854901961,0.380563430065359,0.386758546405229,0.730730917258544,0.233151351633987,0.441146467773712,0"| __truncated__ ...
## $ imgLumB.madLst : chr "0.261635294117647,0.261635294117647,0.366289411764706,0.0639552941176471,0.156981176470588,0.145352941176471,0.156981176470588,"| __truncated__ "0.104654117647059,0.343032941176471,0.261635294117647,0.255821176470588,0.180237647058824,0.250007058823529,0.250007058823529,0"| __truncated__ "0.250007058823529,0.0465129411764706,0.0755835294117647,0.220936470588235,0.19768,0.232564705882353,0.331404705882353,0.2732635"| __truncated__ "0.122096470588235,0.0290705882352941,0.290705882352941,0.232564705882353,0.267449411764706,0.127910588235294,0.418616470588235,"| __truncated__ ...
## $ imgLumG.meanLst: chr "0.430285009150327,0.275434583006536,0.462405437908497,0.101890865359477,0.249514917647059,0.240608418300654,0.561854912418301,0"| __truncated__ "0.309533084726733,0.330929819607843,0.301213992156863,0.449378549996511,0.461475576470588,0.279948002173161,0.509639726220367,0"| __truncated__ "0.11533453815261,0.356775152941176,0.305095362091503,0.296019283660131,0.245902170120717,0.158195952941176,0.345485674412114,0."| __truncated__ "0.51544725751634,0.116746415686275,0.310151654901961,0.400247383006536,0.693268475687489,0.207954070588235,0.374621352847359,0."| __truncated__ ...
## $ imgLumG.madLst : chr "0.290705882352941,0.215122352941176,0.313962352941176,0.0290705882352941,0.0930258823529412,0.116282352941176,0.273263529411765"| __truncated__ "0.0813976470588235,0.220936470588235,0.180237647058824,0.29652,0.220936470588235,0.220936470588235,0.238378823529412,0.05814117"| __truncated__ "0.0406988235294118,0.0755835294117648,0.09884,0.220936470588235,0.151167058823529,0.116282352941176,0.325590588235294,0.3314047"| __truncated__ "0.127910588235294,0.0290705882352941,0.226750588235294,0.191865882352941,0.255821176470588,0.116282352941176,0.348847058823529,"| __truncated__ ...
## $ imgCorRBLst : chr "0.971848384633669,0.844142924409889,0.933626874258348,0.969810338166802,0.781499439147351,0.96029374022022,0.974561522049522,0."| __truncated__ "0.977826640295478,0.970093800087844,0.958117848866376,0.826425463623886,0.902990540155383,0.954669818445583,0.901772014521726,0"| __truncated__ "0.751064433278075,0.855668373119896,0.972904554828568,0.939937456381645,0.926619297264817,0.92939373950199,0.990492780325631,0."| __truncated__ "0.555820014464522,0.987192718776747,0.959839340799832,0.959920131196621,0.995094722698392,0.951215470717168,0.976819867721892,0"| __truncated__ ...
## $ imgCorBGLst : chr "0.966759517902915,0.838294605196521,0.926032937557564,0.690245987299617,0.927094333403652,0.981477624704281,0.948620748095784,0"| __truncated__ "0.979675281758249,0.931181159776226,0.976943530052098,0.922761873692084,0.979859433730912,0.978621329328549,0.94517909321353,0."| __truncated__ "0.800172977668431,0.940824315292693,0.983183564231686,0.966469004416202,0.948687307759127,0.916606116096075,0.989371346109249,0"| __truncated__ "0.963092912451779,0.986737806214834,0.987758595775795,0.944562407566799,0.992731818832544,0.966874821635512,0.977040624187135,0"| __truncated__ ...
## $ imgCorGRLst : chr "0.908233427536138,0.662158186456318,0.790033846396902,0.661825133591838,0.706251827780452,0.92733706786448,0.917050501395878,0."| __truncated__ "0.939796742137535,0.853767975534679,0.926302731451899,0.727291203433466,0.831679683293415,0.917666327515192,0.753552074869825,0"| __truncated__ "0.441646436230003,0.683247381338638,0.951309332579077,0.90104916738589,0.790516505951237,0.775943981411096,0.96706568094178,0.9"| __truncated__ "0.652874422660011,0.960026571284438,0.957548749840944,0.893443330148534,0.97897232994291,0.905843186755115,0.926313116437056,0."| __truncated__ ...
## $ imgCosSmlRBLst : chr "0.979683847698728,0.842423493063493,0.927650747647504,0.766240959072844,0.889905947268674,0.971297206146872,0.978981063917538,0"| __truncated__ "0.981250004820448,0.960622449715253,0.964180727999246,0.910476761778145,0.967889643398825,0.962016500076085,0.9465863743288,0.9"| __truncated__ "0.588207431002124,0.957670991272398,0.99119061669802,0.947822259934586,0.871121143446002,0.93765604224368,0.978932457985475,0.9"| __truncated__ "0.97594530245552,0.975293999039549,0.969604590075432,0.973708147249703,0.998046338257088,0.968532835059471,0.968690671623621,0."| __truncated__ ...
## $ imgCosSmlBGLst : chr "0.99233923888975,0.896735079767823,0.973423361287955,0.77963268894566,0.935925242505431,0.992467216100079,0.987106891004159,0.9"| __truncated__ "0.989000892129007,0.981571328444779,0.990248013460088,0.970851734862672,0.995231073626733,0.990796747634173,0.989820608945766,0"| __truncated__ "0.815204820449962,0.987659243028195,0.997049498593563,0.987396333282936,0.959653443396857,0.975695204785106,0.993840981552915,0"| __truncated__ "0.996816305782785,0.989285962130579,0.991925036311123,0.985214131962629,0.999186777449118,0.988717617452914,0.990680634693098,0"| __truncated__ ...
## $ imgCosSmlGRLst : chr "0.979683847698728,0.842423493063493,0.927650747647504,0.766240959072844,0.889905947268674,0.971297206146872,0.978981063917538,0"| __truncated__ "0.981250004820448,0.960622449715253,0.964180727999246,0.910476761778145,0.967889643398825,0.962016500076085,0.9465863743288,0.9"| __truncated__ "0.588207431002124,0.957670991272398,0.99119061669802,0.947822259934586,0.871121143446002,0.93765604224368,0.978932457985475,0.9"| __truncated__ "0.97594530245552,0.975293999039549,0.969604590075432,0.973708147249703,0.998046338257088,0.968532835059471,0.968690671623621,0."| __truncated__ ...
## - attr(*, "comment")= chr "glbObsNew"
## NULL
## [1] "Creating new feature: .pos..."
## [1] "Creating new feature: nImgs.log1p..."
## [1] "Creating new feature: nImgs.root2..."
## [1] "Creating new feature: nImgs.nexp..."
## [1] "Creating new feature: resX.mean..."
## [1] "Creating new feature: resX.mad..."
## [1] "Creating new feature: resX.mean.log1p..."
## [1] "Creating new feature: resX.mean.root2..."
## [1] "Creating new feature: resX.mean.nexp..."
## [1] "Creating new feature: resX.mad.log1p..."
## [1] "Creating new feature: resX.mad.root2..."
## [1] "Creating new feature: resX.mad.nexp..."
## [1] "Creating new feature: resY.mean..."
## [1] "Creating new feature: resY.mad..."
## [1] "Creating new feature: resY.mean.log1p..."
## [1] "Creating new feature: resY.mean.root2..."
## [1] "Creating new feature: resY.mean.nexp..."
## [1] "Creating new feature: resY.mad.log1p..."
## [1] "Creating new feature: resY.mad.root2..."
## [1] "Creating new feature: resY.mad.nexp..."
## [1] "Creating new feature: resXY.mean..."
## [1] "Creating new feature: resXY.mad..."
## [1] "Creating new feature: resXY.mean.log1p..."
## [1] "Creating new feature: resXY.mean.root2..."
## [1] "Creating new feature: resXY.mean.nexp..."
## [1] "Creating new feature: resXY.mad.log1p..."
## [1] "Creating new feature: resXY.mad.root2..."
## [1] "Creating new feature: resXY.mad.nexp..."
## [1] "Creating new feature: lumR.mean.mean..."
## [1] "Creating new feature: lumR.mean.mad..."
## [1] "Creating new feature: lumR.mad.mean..."
## [1] "Creating new feature: lumR.mad.mad..."
## [1] "Creating new feature: lumB.mean.mean..."
## [1] "Creating new feature: lumB.mean.mad..."
## [1] "Creating new feature: lumB.mad.mean..."
## [1] "Creating new feature: lumB.mad.mad..."
## [1] "Creating new feature: lumG.mean.mean..."
## [1] "Creating new feature: lumG.mean.mad..."
## [1] "Creating new feature: lumG.mad.mean..."
## [1] "Creating new feature: lumG.mad.mad..."
## [1] "Creating new feature: CorRB.mean..."
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CorRB.mad..."
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CorBG.mean..."
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CorBG.mad..."
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CorGR.mean..."
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mean(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CorGR.mad..."
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## Warning in mad(as.numeric(unlist(str_split(thsObsFeat, ","))), na.rm =
## TRUE): NAs introduced by coercion
## [1] "Creating new feature: CosSmlRB.mean..."
## [1] "Creating new feature: CosSmlRB.mad..."
## [1] "Creating new feature: CosSmlBG.mean..."
## [1] "Creating new feature: CosSmlBG.mad..."
## [1] "Creating new feature: CosSmlGR.mean..."
## [1] "Creating new feature: CosSmlGR.mad..."
## [1] "Creating new feature: lumG.mad.mean.cut.fctr..."
## [1] "Creating new feature: lunch..."
## [1] "Creating new feature: dinner..."
## [1] "Creating new feature: reserve..."
## [1] "Creating new feature: outdoor..."
## [1] "Creating new feature: expensive..."
## [1] "Creating new feature: liquor..."
## [1] "Creating new feature: table..."
## [1] "Creating new feature: classy..."
## [1] "Creating new feature: kids..."
## [1] "Creating new feature: nImgs.cut.fctr..."
## [1] "Partition stats:"
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: tcltk
## outdoor .src .n
## 1 <NA> Test 10000
## 2 3 Train 1003
## 3 -1 Train 997
## outdoor .src .n
## 1 <NA> Test 10000
## 2 3 Train 1003
## 3 -1 Train 997
## Loading required package: RColorBrewer
## .src .n
## 1 Test 10000
## 2 Train 2000
## Loading required package: lazyeval
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## [1] "Found 0 duplicates by all features:"
## NULL
## label step_major step_minor label_minor bgn end elapsed
## 1 import.data 1 0 0 9.012 163.961 154.949
## 2 inspect.data 2 0 0 163.962 NA NA
2.0: inspect data## Loading required package: reshape2
## outdoor.-1 outdoor.3 outdoor.NA
## Test NA NA 10000
## Train 997 1003 NA
## outdoor.-1 outdoor.3 outdoor.NA
## Test NA NA 1
## Train 0.4985 0.5015 NA
## [1] "numeric data missing in glbObsAll: "
## lunch dinner reserve outdoor expensive liquor table
## 10000 10000 10000 10000 10000 10000 10000
## classy kids
## 10000 10000
## [1] "numeric data w/ 0s in glbObsAll: "
## nImgs.nexp resX.mad resX.mad.log1p resX.mad.root2
## 228 9353 9353 9353
## resY.mad resY.mad.log1p resY.mad.root2 resXY.mad
## 5442 5442 5442 10915
## resXY.mean.nexp resXY.mad.log1p resXY.mad.root2 resXY.mad.nexp
## 12000 10915 10915 850
## lumR.mean.mad lumR.mad.mad lumB.mean.mad lumB.mad.mad
## 9 9 9 9
## lumG.mean.mad lumG.mad.mad CorRB.mad CorBG.mad
## 9 9 9 9
## CorGR.mad CosSmlRB.mad CosSmlBG.mad CosSmlGR.mad
## 9 9 9 9
## lunch
## 671
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## business_id labels imgResXLst imgResYLst
## 0 NA 0 0
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## 0 0 0 0
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## 0 0 0 0
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## 0 0 0 0
## imgCosSmlGRLst
## 0
## outdoor outdoor.fctr .n
## 1 <NA> <NA> 10000
## 2 3 Y 1003
## 3 -1 N 997
## Warning: Removed 1 rows containing missing values (position_stack).
## outdoor.fctr.N outdoor.fctr.Y outdoor.fctr.NA
## Test NA NA 10000
## Train 997 1003 NA
## outdoor.fctr.N outdoor.fctr.Y outdoor.fctr.NA
## Test NA NA 1
## Train 0.4985 0.5015 NA
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## NULL
## label step_major step_minor label_minor bgn end elapsed
## 2 inspect.data 2 0 0 163.962 207.933 43.971
## 3 scrub.data 2 1 1 207.934 NA NA
2.1: scrub data## [1] "numeric data missing in glbObsAll: "
## lunch dinner reserve outdoor expensive
## 10000 10000 10000 10000 10000
## liquor table classy kids outdoor.fctr
## 10000 10000 10000 10000 10000
## [1] "numeric data w/ 0s in glbObsAll: "
## nImgs.nexp resX.mad resX.mad.log1p resX.mad.root2
## 228 9353 9353 9353
## resY.mad resY.mad.log1p resY.mad.root2 resXY.mad
## 5442 5442 5442 10915
## resXY.mean.nexp resXY.mad.log1p resXY.mad.root2 resXY.mad.nexp
## 12000 10915 10915 850
## lumR.mean.mad lumR.mad.mad lumB.mean.mad lumB.mad.mad
## 9 9 9 9
## lumG.mean.mad lumG.mad.mad CorRB.mad CorBG.mad
## 9 9 9 9
## CorGR.mad CosSmlRB.mad CosSmlBG.mad CosSmlGR.mad
## 9 9 9 9
## lunch
## 671
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## business_id labels imgResXLst imgResYLst
## 0 NA 0 0
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## 0 0 0 0
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## 0 0 0 0
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## 0 0 0 0
## imgCosSmlGRLst
## 0
## label step_major step_minor label_minor bgn end elapsed
## 3 scrub.data 2 1 1 207.934 220.921 12.988
## 4 transform.data 2 2 2 220.922 NA NA
2.2: transform data## label step_major step_minor label_minor bgn end
## 4 transform.data 2 2 2 220.922 220.995
## 5 extract.features 3 0 0 220.996 NA
## elapsed
## 4 0.074
## 5 NA
3.0: extract features## label step_major step_minor label_minor bgn
## 5 extract.features 3 0 0 220.996
## 6 extract.features.datetime 3 1 1 221.018
## end elapsed
## 5 221.017 0.022
## 6 NA NA
3.1: extract features datetime## label step_major step_minor label_minor bgn
## 1 extract.features.datetime.bgn 1 0 0 221.044
## end elapsed
## 1 NA NA
## label step_major step_minor label_minor bgn
## 6 extract.features.datetime 3 1 1 221.018
## 7 extract.features.image 3 2 2 221.055
## end elapsed
## 6 221.054 0.037
## 7 NA NA
3.2: extract features image## label step_major step_minor label_minor bgn end
## 1 extract.features.image.bgn 1 0 0 221.091 NA
## elapsed
## 1 NA
## label step_major step_minor label_minor bgn
## 1 extract.features.image.bgn 1 0 0 221.091
## 2 extract.features.image.end 2 0 0 221.101
## end elapsed
## 1 221.1 0.009
## 2 NA NA
## label step_major step_minor label_minor bgn
## 1 extract.features.image.bgn 1 0 0 221.091
## 2 extract.features.image.end 2 0 0 221.101
## end elapsed
## 1 221.1 0.009
## 2 NA NA
## label step_major step_minor label_minor bgn end
## 7 extract.features.image 3 2 2 221.055 221.111
## 8 extract.features.price 3 3 3 221.112 NA
## elapsed
## 7 0.056
## 8 NA
3.3: extract features price## label step_major step_minor label_minor bgn end
## 1 extract.features.price.bgn 1 0 0 221.14 NA
## elapsed
## 1 NA
## label step_major step_minor label_minor bgn end
## 8 extract.features.price 3 3 3 221.112 221.149
## 9 extract.features.text 3 4 4 221.149 NA
## elapsed
## 8 0.037
## 9 NA
3.4: extract features text## label step_major step_minor label_minor bgn end
## 1 extract.features.text.bgn 1 0 0 221.194 NA
## elapsed
## 1 NA
## label step_major step_minor label_minor bgn
## 9 extract.features.text 3 4 4 221.149
## 10 extract.features.string 3 5 5 221.204
## end elapsed
## 9 221.203 0.054
## 10 NA NA
3.5: extract features string## label step_major step_minor label_minor bgn
## 1 extract.features.string.bgn 1 0 0 221.235
## end elapsed
## 1 NA NA
## label step_major step_minor
## 1 extract.features.string.bgn 1 0
## 2 extract.features.stringfactorize.str.vars 2 0
## label_minor bgn end elapsed
## 1 0 221.235 221.245 0.01
## 2 0 221.245 NA NA
## business_id labels imgResXLst imgResYLst
## "business_id" "labels" "imgResXLst" "imgResYLst"
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## "imgResXYLst" "imgLumR.meanLst" "imgLumR.madLst" "imgLumB.meanLst"
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## "imgLumB.madLst" "imgLumG.meanLst" "imgLumG.madLst" "imgCorRBLst"
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## "imgCorBGLst" "imgCorGRLst" "imgCosSmlRBLst" "imgCosSmlBGLst"
## imgCosSmlGRLst .src
## "imgCosSmlGRLst" ".src"
## label step_major step_minor label_minor bgn
## 10 extract.features.string 3 5 5 221.204
## 11 extract.features.end 3 6 6 221.262
## end elapsed
## 10 221.261 0.057
## 11 NA NA
3.6: extract features end## [1] "Summary for lunch:"
##
## -1 0 <NA>
## Test 0 0 10000
## Train 1329 671 0
## [1] "Summary for dinner:"
##
## -1 1 <NA>
## Test 0 0 10000
## Train 1007 993 0
## [1] "Summary for reserve:"
##
## -1 2 <NA>
## Test 0 0 10000
## Train 974 1026 0
## [1] "Summary for outdoor:"
##
## -1 3 <NA>
## Test 0 0 10000
## Train 997 1003 0
## [1] "Summary for expensive:"
##
## -1 4 <NA>
## Test 0 0 10000
## Train 1453 547 0
## [1] "Summary for liquor:"
##
## -1 5 <NA>
## Test 0 0 10000
## Train 751 1249 0
## [1] "Summary for table:"
##
## -1 6 <NA>
## Test 0 0 10000
## Train 640 1360 0
## [1] "Summary for classy:"
##
## -1 7 <NA>
## Test 0 0 10000
## Train 1428 572 0
## [1] "Summary for kids:"
##
## -1 8 <NA>
## Test 0 0 10000
## Train 762 1238 0
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## label step_major step_minor label_minor bgn end
## 11 extract.features.end 3 6 6 221.262 222.239
## 12 manage.missing.data 4 0 0 222.240 NA
## elapsed
## 11 0.977
## 12 NA
4.0: manage missing data## [1] "numeric data missing in glbObsAll: "
## lunch dinner reserve outdoor expensive
## 10000 10000 10000 10000 10000
## liquor table classy kids outdoor.fctr
## 10000 10000 10000 10000 10000
## [1] "numeric data w/ 0s in glbObsAll: "
## nImgs.nexp resX.mad resX.mad.log1p resX.mad.root2
## 228 9353 9353 9353
## resY.mad resY.mad.log1p resY.mad.root2 resXY.mad
## 5442 5442 5442 10915
## resXY.mean.nexp resXY.mad.log1p resXY.mad.root2 resXY.mad.nexp
## 12000 10915 10915 850
## lumR.mean.mad lumR.mad.mad lumB.mean.mad lumB.mad.mad
## 9 9 9 9
## lumG.mean.mad lumG.mad.mad CorRB.mad CorBG.mad
## 9 9 9 9
## CorGR.mad CosSmlRB.mad CosSmlBG.mad CosSmlGR.mad
## 9 9 9 9
## lunch
## 671
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## business_id labels imgResXLst imgResYLst
## 0 NA 0 0
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## 0 0 0 0
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## 0 0 0 0
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## 0 0 0 0
## imgCosSmlGRLst
## 0
## [1] "numeric data missing in glbObsAll: "
## lunch dinner reserve outdoor expensive
## 10000 10000 10000 10000 10000
## liquor table classy kids outdoor.fctr
## 10000 10000 10000 10000 10000
## [1] "numeric data w/ 0s in glbObsAll: "
## nImgs.nexp resX.mad resX.mad.log1p resX.mad.root2
## 228 9353 9353 9353
## resY.mad resY.mad.log1p resY.mad.root2 resXY.mad
## 5442 5442 5442 10915
## resXY.mean.nexp resXY.mad.log1p resXY.mad.root2 resXY.mad.nexp
## 12000 10915 10915 850
## lumR.mean.mad lumR.mad.mad lumB.mean.mad lumB.mad.mad
## 9 9 9 9
## lumG.mean.mad lumG.mad.mad CorRB.mad CorBG.mad
## 9 9 9 9
## CorGR.mad CosSmlRB.mad CosSmlBG.mad CosSmlGR.mad
## 9 9 9 9
## lunch
## 671
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## business_id labels imgResXLst imgResYLst
## 0 NA 0 0
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## 0 0 0 0
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## 0 0 0 0
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## 0 0 0 0
## imgCosSmlGRLst
## 0
## label step_major step_minor label_minor bgn end
## 12 manage.missing.data 4 0 0 222.240 222.726
## 13 cluster.data 5 0 0 222.727 NA
## elapsed
## 12 0.487
## 13 NA
5.0: cluster data## label step_major step_minor label_minor bgn
## 13 cluster.data 5 0 0 222.727
## 14 partition.data.training 6 0 0 222.815
## end elapsed
## 13 222.814 0.088
## 14 NA NA
6.0: partition data training## [1] "partition.data.training chunk: setup: elapsed: 0.00 secs"
## [1] "partition.data.training chunk: strata_mtrx complete: elapsed: 0.15 secs"
## [1] "partition.data.training chunk: obs_freq_df complete: elapsed: 0.16 secs"
## Loading required package: sampling
##
## Attaching package: 'sampling'
## The following objects are masked from 'package:survival':
##
## cluster, strata
## The following object is masked from 'package:caret':
##
## cluster
## [1] "partition.data.training chunk: Fit/OOB partition complete: elapsed: 0.33 secs"
## outdoor.-1 outdoor.3 outdoor.NA
## NA NA 10000
## Fit 500 503 NA
## OOB 497 500 NA
## outdoor.-1 outdoor.3 outdoor.NA
## NA NA 1
## Fit 0.4985045 0.5014955 NA
## OOB 0.4984955 0.5015045 NA
## lumG.mad.mean.cut.fctr .n.Fit .n.OOB .n.Tst .freqRatio.Fit
## 1 (0.07,0.21] 367 365 2801 0.3659023
## 2 (0.21,0.22] 108 107 2591 0.1076770
## 4 (0.23,0.37] 412 411 2416 0.4107677
## 3 (0.22,0.23] 116 114 2192 0.1156530
## .freqRatio.OOB .freqRatio.Tst
## 1 0.3660983 0.2801
## 2 0.1073220 0.2591
## 4 0.4122367 0.2416
## 3 0.1143430 0.2192
## [1] "glbObsAll: "
## [1] 12000 85
## [1] "glbObsTrn: "
## [1] 2000 85
## [1] "glbObsFit: "
## [1] 1003 84
## [1] "glbObsOOB: "
## [1] 997 84
## [1] "glbObsNew: "
## [1] 10000 84
## [1] "partition.data.training chunk: teardown: elapsed: 2.06 secs"
## label step_major step_minor label_minor bgn
## 14 partition.data.training 6 0 0 222.815
## 15 select.features 7 0 0 224.932
## end elapsed
## 14 224.932 2.117
## 15 NA NA
7.0: select features## Warning in cor(data.matrix(entity_df[, sel_feats]), y =
## as.numeric(entity_df[, : the standard deviation is zero
## [1] "cor(CosSmlGR.mean, CosSmlRB.mean)=1.0000"
## [1] "cor(outdoor.fctr, CosSmlGR.mean)=0.0210"
## [1] "cor(outdoor.fctr, CosSmlRB.mean)=0.0210"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified CosSmlRB.mean as highly correlated with
## CosSmlGR.mean
## [1] "cor(resX.mean.nexp, resY.mean.nexp)=1.0000"
## [1] "cor(outdoor.fctr, resX.mean.nexp)=-0.0224"
## [1] "cor(outdoor.fctr, resY.mean.nexp)=-0.0224"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resY.mean.nexp as highly correlated with
## resX.mean.nexp
## [1] "cor(resX.mean, resX.mean.root2)=0.9996"
## [1] "cor(outdoor.fctr, resX.mean)=-0.0177"
## [1] "cor(outdoor.fctr, resX.mean.root2)=-0.0164"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resX.mean.root2 as highly correlated with resX.mean
## [1] "cor(resY.mean, resY.mean.root2)=0.9995"
## [1] "cor(outdoor.fctr, resY.mean)=0.0126"
## [1] "cor(outdoor.fctr, resY.mean.root2)=0.0131"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resY.mean as highly correlated with resY.mean.root2
## [1] "cor(resY.mean.log1p, resY.mean.root2)=0.9994"
## [1] "cor(outdoor.fctr, resY.mean.log1p)=0.0136"
## [1] "cor(outdoor.fctr, resY.mean.root2)=0.0131"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resY.mean.root2 as highly correlated with
## resY.mean.log1p
## [1] "cor(resX.mean, resX.mean.log1p)=0.9985"
## [1] "cor(outdoor.fctr, resX.mean)=-0.0177"
## [1] "cor(outdoor.fctr, resX.mean.log1p)=-0.0151"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resX.mean.log1p as highly correlated with resX.mean
## [1] "cor(resX.mad.log1p, resX.mad.root2)=0.9880"
## [1] "cor(outdoor.fctr, resX.mad.log1p)=0.0220"
## [1] "cor(outdoor.fctr, resX.mad.root2)=0.0219"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resX.mad.root2 as highly correlated with
## resX.mad.log1p
## [1] "cor(resXY.mad.log1p, resXY.mad.nexp)=-0.9803"
## [1] "cor(outdoor.fctr, resXY.mad.log1p)=-0.0141"
## [1] "cor(outdoor.fctr, resXY.mad.nexp)=0.0154"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resXY.mad.log1p as highly correlated with
## resXY.mad.nexp
## [1] "cor(resX.mad, resX.mad.log1p)=0.9375"
## [1] "cor(outdoor.fctr, resX.mad)=0.0205"
## [1] "cor(outdoor.fctr, resX.mad.log1p)=0.0220"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resX.mad as highly correlated with resX.mad.log1p
## [1] "cor(resXY.mad, resXY.mad.root2)=0.9334"
## [1] "cor(outdoor.fctr, resXY.mad)=-0.0119"
## [1] "cor(outdoor.fctr, resXY.mad.root2)=-0.0114"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resXY.mad.root2 as highly correlated with resXY.mad
## [1] "cor(resX.mad.log1p, resX.mad.nexp)=-0.9321"
## [1] "cor(outdoor.fctr, resX.mad.log1p)=0.0220"
## [1] "cor(outdoor.fctr, resX.mad.nexp)=-0.0140"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified resX.mad.nexp as highly correlated with
## resX.mad.log1p
## [1] "cor(nImgs.log1p, nImgs.root2)=0.9280"
## [1] "cor(outdoor.fctr, nImgs.log1p)=0.0473"
## [1] "cor(outdoor.fctr, nImgs.root2)=0.0140"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified nImgs.root2 as highly correlated with nImgs.log1p
## [1] "cor(nImgs.cut.fctr, nImgs.log1p)=0.9109"
## [1] "cor(outdoor.fctr, nImgs.cut.fctr)=0.0586"
## [1] "cor(outdoor.fctr, nImgs.log1p)=0.0473"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df
## = glbObsTrn, : Identified nImgs.log1p as highly correlated with
## nImgs.cut.fctr
## [1] "cor(lumB.mean.mean, lumG.mean.mean)=0.8988"
## [1] "cor(outdoor.fctr, lumB.mean.mean)=-0.0325"
## [1] "cor(outdoor.fctr, lumG.mean.mean)=0.0525"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified lumB.mean.mean as highly correlated with
## lumG.mean.mean
## [1] "cor(CosSmlBG.mean, CosSmlGR.mean)=0.8700"
## [1] "cor(outdoor.fctr, CosSmlBG.mean)=0.0300"
## [1] "cor(outdoor.fctr, CosSmlGR.mean)=0.0210"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified CosSmlGR.mean as highly correlated with
## CosSmlBG.mean
## [1] "cor(lumG.mad.mean, lumG.mad.mean.cut.fctr)=0.8678"
## [1] "cor(outdoor.fctr, lumG.mad.mean)=0.0751"
## [1] "cor(outdoor.fctr, lumG.mad.mean.cut.fctr)=0.0897"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified lumG.mad.mean as highly correlated with
## lumG.mad.mean.cut.fctr
## [1] "cor(CorRB.mad, CorRB.mean)=-0.8574"
## [1] "cor(outdoor.fctr, CorRB.mad)=0.0382"
## [1] "cor(outdoor.fctr, CorRB.mean)=-0.0096"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified CorRB.mean as highly correlated with CorRB.mad
## [1] "cor(CorBG.mean, CosSmlBG.mean)=0.8548"
## [1] "cor(outdoor.fctr, CorBG.mean)=0.0162"
## [1] "cor(outdoor.fctr, CosSmlBG.mean)=0.0300"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified CorBG.mean as highly correlated with CosSmlBG.mean
## [1] "cor(CosSmlBG.mad, CosSmlBG.mean)=-0.8236"
## [1] "cor(outdoor.fctr, CosSmlBG.mad)=-0.0462"
## [1] "cor(outdoor.fctr, CosSmlBG.mean)=0.0300"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified CosSmlBG.mean as highly correlated with
## CosSmlBG.mad
## [1] "cor(lumB.mean.mad, lumG.mean.mad)=0.7144"
## [1] "cor(outdoor.fctr, lumB.mean.mad)=0.0496"
## [1] "cor(outdoor.fctr, lumG.mean.mad)=0.0516"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified lumB.mean.mad as highly correlated with
## lumG.mean.mad
## cor.y exclude.as.feat cor.y.abs
## outdoor 1.000000000 1 1.000000000
## liquor 0.100416198 1 0.100416198
## lumG.mad.mean.cut.fctr 0.089659386 0 0.089659386
## lumG.mad.mean 0.075062422 0 0.075062422
## lumG.mad.mad 0.069818982 0 0.069818982
## nImgs.cut.fctr 0.058567974 0 0.058567974
## lumG.mean.mean 0.052492718 0 0.052492718
## lumG.mean.mad 0.051616272 0 0.051616272
## lumB.mean.mad 0.049622224 0 0.049622224
## nImgs.log1p 0.047250893 0 0.047250893
## reserve 0.038935338 1 0.038935338
## lumB.mad.mad 0.038630626 0 0.038630626
## CorRB.mad 0.038198961 0 0.038198961
## lumR.mad.mean 0.036065638 0 0.036065638
## CorGR.mad 0.030253289 0 0.030253289
## CosSmlBG.mean 0.030023372 0 0.030023372
## .pos 0.027497300 0 0.027497300
## resX.mad.log1p 0.022032870 0 0.022032870
## resX.mad.root2 0.021937317 0 0.021937317
## CosSmlGR.mean 0.021022718 0 0.021022718
## CosSmlRB.mean 0.021022718 0 0.021022718
## resX.mad 0.020537518 0 0.020537518
## lumB.mad.mean 0.019323904 0 0.019323904
## expensive 0.017228141 1 0.017228141
## CorBG.mean 0.016157691 0 0.016157691
## classy 0.015804825 1 0.015804825
## lumR.mean.mad 0.015642413 0 0.015642413
## resXY.mad.nexp 0.015437895 0 0.015437895
## nImgs.root2 0.014028124 0 0.014028124
## lumR.mad.mad 0.013705157 0 0.013705157
## resY.mean.log1p 0.013625190 0 0.013625190
## resY.mean.root2 0.013106506 0 0.013106506
## resY.mean 0.012599188 0 0.012599188
## resY.mad.nexp 0.012190340 0 0.012190340
## resY.mad 0.007630633 0 0.007630633
## CorGR.mean 0.004925319 0 0.004925319
## CorBG.mad 0.003604604 0 0.003604604
## resY.mad.root2 0.002557583 0 0.002557583
## resY.mad.log1p -0.001526058 0 0.001526058
## nImgs.nexp -0.003435316 0 0.003435316
## CosSmlGR.mad -0.003587615 0 0.003587615
## CosSmlRB.mad -0.003587615 0 0.003587615
## resXY.mean.log1p -0.004867571 0 0.004867571
## lunch -0.005308550 1 0.005308550
## resXY.mean.root2 -0.007039955 0 0.007039955
## .rnorm -0.008042720 0 0.008042720
## resXY.mean -0.009002880 0 0.009002880
## CorRB.mean -0.009617034 0 0.009617034
## resXY.mad.root2 -0.011364822 0 0.011364822
## resXY.mad -0.011946049 0 0.011946049
## resX.mad.nexp -0.014000008 0 0.014000008
## resXY.mad.log1p -0.014055066 0 0.014055066
## nImgs -0.014963676 0 0.014963676
## resX.mean.log1p -0.015059015 0 0.015059015
## resX.mean.root2 -0.016434019 0 0.016434019
## resX.mean -0.017726551 0 0.017726551
## resX.mean.nexp -0.022433472 0 0.022433472
## resY.mean.nexp -0.022433472 0 0.022433472
## lumB.mean.mean -0.032529239 0 0.032529239
## dinner -0.039980159 1 0.039980159
## CosSmlBG.mad -0.046206836 0 0.046206836
## table -0.055823041 1 0.055823041
## kids -0.075895168 1 0.075895168
## lumR.mean.mean -0.115393376 0 0.115393376
## resXY.mean.nexp NA 0 NA
## cor.high.X freqRatio percentUnique
## outdoor <NA> 1.006018 0.10
## liquor <NA> 1.663116 0.10
## lumG.mad.mean.cut.fctr <NA> 1.124317 0.20
## lumG.mad.mean lumG.mad.mean.cut.fctr 1.000000 96.00
## lumG.mad.mad <NA> 1.049261 2.40
## nImgs.cut.fctr <NA> 1.007737 0.20
## lumG.mean.mean <NA> 2.000000 99.95
## lumG.mean.mad <NA> 2.000000 99.95
## lumB.mean.mad lumG.mean.mad 1.000000 100.00
## nImgs.log1p nImgs.cut.fctr 1.033333 19.10
## reserve <NA> 1.053388 0.10
## lumB.mad.mad <NA> 1.022624 2.75
## CorRB.mad <NA> 1.000000 100.00
## lumR.mad.mean <NA> 1.142857 93.95
## CorGR.mad <NA> 1.000000 100.00
## CosSmlBG.mean CosSmlBG.mad 1.000000 99.70
## .pos <NA> 1.000000 100.00
## resX.mad.log1p <NA> 11.209677 7.55
## resX.mad.root2 resX.mad.log1p 11.209677 7.55
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70
## resX.mad resX.mad.log1p 11.209677 7.55
## lumB.mad.mean <NA> 1.200000 92.35
## expensive <NA> 2.656307 0.10
## CorBG.mean CosSmlBG.mean 1.000000 99.90
## classy <NA> 2.496503 0.10
## lumR.mean.mad <NA> 2.000000 99.95
## resXY.mad.nexp <NA> 4.841317 0.30
## nImgs.root2 nImgs.log1p 1.033333 19.10
## lumR.mad.mad <NA> 1.020576 2.30
## resY.mean.log1p <NA> 1.666667 97.90
## resY.mean.root2 resY.mean.log1p 1.666667 97.85
## resY.mean resY.mean.root2 1.666667 98.15
## resY.mad.nexp <NA> 5.354497 9.05
## resY.mad <NA> 5.354497 9.05
## CorGR.mean <NA> 2.000000 99.95
## CorBG.mad <NA> 1.000000 100.00
## resY.mad.root2 <NA> 5.354497 9.05
## resY.mad.log1p <NA> 5.354497 9.05
## nImgs.nexp <NA> 1.193548 17.35
## CosSmlGR.mad <NA> 1.000000 100.00
## CosSmlRB.mad <NA> 1.000000 100.00
## resXY.mean.log1p <NA> 4.000000 90.80
## lunch <NA> 1.980626 0.10
## resXY.mean.root2 <NA> 6.000000 98.20
## .rnorm <NA> 1.000000 100.00
## resXY.mean <NA> 6.000000 98.55
## CorRB.mean CorRB.mad 1.000000 100.00
## resXY.mad.root2 resXY.mad 9.568047 4.35
## resXY.mad <NA> 9.568047 4.35
## resX.mad.nexp resX.mad.log1p 11.209677 7.55
## resXY.mad.log1p resXY.mad.nexp 9.568047 4.35
## nImgs <NA> 1.033333 19.10
## resX.mean.log1p resX.mean 2.000000 97.60
## resX.mean.root2 resX.mean 2.000000 97.45
## resX.mean <NA> 2.000000 97.75
## resX.mean.nexp <NA> 2.000000 97.75
## resY.mean.nexp resX.mean.nexp 1.666667 98.15
## lumB.mean.mean lumG.mean.mean 2.000000 99.95
## dinner <NA> 1.014099 0.10
## CosSmlBG.mad <NA> 1.000000 100.00
## table <NA> 2.125000 0.10
## kids <NA> 1.624672 0.10
## lumR.mean.mean <NA> 1.000000 100.00
## resXY.mean.nexp <NA> 0.000000 0.05
## zeroVar nzv is.cor.y.abs.low
## outdoor FALSE FALSE FALSE
## liquor FALSE FALSE FALSE
## lumG.mad.mean.cut.fctr FALSE FALSE FALSE
## lumG.mad.mean FALSE FALSE FALSE
## lumG.mad.mad FALSE FALSE FALSE
## nImgs.cut.fctr FALSE FALSE FALSE
## lumG.mean.mean FALSE FALSE FALSE
## lumG.mean.mad FALSE FALSE FALSE
## lumB.mean.mad FALSE FALSE FALSE
## nImgs.log1p FALSE FALSE FALSE
## reserve FALSE FALSE FALSE
## lumB.mad.mad FALSE FALSE FALSE
## CorRB.mad FALSE FALSE FALSE
## lumR.mad.mean FALSE FALSE FALSE
## CorGR.mad FALSE FALSE FALSE
## CosSmlBG.mean FALSE FALSE FALSE
## .pos FALSE FALSE FALSE
## resX.mad.log1p FALSE FALSE FALSE
## resX.mad.root2 FALSE FALSE FALSE
## CosSmlGR.mean FALSE FALSE FALSE
## CosSmlRB.mean FALSE FALSE FALSE
## resX.mad FALSE FALSE FALSE
## lumB.mad.mean FALSE FALSE FALSE
## expensive FALSE FALSE FALSE
## CorBG.mean FALSE FALSE FALSE
## classy FALSE FALSE FALSE
## lumR.mean.mad FALSE FALSE FALSE
## resXY.mad.nexp FALSE FALSE FALSE
## nImgs.root2 FALSE FALSE FALSE
## lumR.mad.mad FALSE FALSE FALSE
## resY.mean.log1p FALSE FALSE FALSE
## resY.mean.root2 FALSE FALSE FALSE
## resY.mean FALSE FALSE FALSE
## resY.mad.nexp FALSE FALSE FALSE
## resY.mad FALSE FALSE TRUE
## CorGR.mean FALSE FALSE TRUE
## CorBG.mad FALSE FALSE TRUE
## resY.mad.root2 FALSE FALSE TRUE
## resY.mad.log1p FALSE FALSE TRUE
## nImgs.nexp FALSE FALSE TRUE
## CosSmlGR.mad FALSE FALSE TRUE
## CosSmlRB.mad FALSE FALSE TRUE
## resXY.mean.log1p FALSE FALSE TRUE
## lunch FALSE FALSE TRUE
## resXY.mean.root2 FALSE FALSE TRUE
## .rnorm FALSE FALSE FALSE
## resXY.mean FALSE FALSE FALSE
## CorRB.mean FALSE FALSE FALSE
## resXY.mad.root2 FALSE FALSE FALSE
## resXY.mad FALSE FALSE FALSE
## resX.mad.nexp FALSE FALSE FALSE
## resXY.mad.log1p FALSE FALSE FALSE
## nImgs FALSE FALSE FALSE
## resX.mean.log1p FALSE FALSE FALSE
## resX.mean.root2 FALSE FALSE FALSE
## resX.mean FALSE FALSE FALSE
## resX.mean.nexp FALSE FALSE FALSE
## resY.mean.nexp FALSE FALSE FALSE
## lumB.mean.mean FALSE FALSE FALSE
## dinner FALSE FALSE FALSE
## CosSmlBG.mad FALSE FALSE FALSE
## table FALSE FALSE FALSE
## kids FALSE FALSE FALSE
## lumR.mean.mean FALSE FALSE FALSE
## resXY.mean.nexp TRUE TRUE NA
## Warning in myplot_scatter(plt_feats_df, "percentUnique", "freqRatio",
## colorcol_name = "nzv", : converting nzv to class:factor
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).
## cor.y exclude.as.feat cor.y.abs cor.high.X freqRatio
## resXY.mean.nexp NA 0 NA <NA> 0
## percentUnique zeroVar nzv is.cor.y.abs.low
## resXY.mean.nexp 0.05 TRUE TRUE NA
## [1] "numeric data missing in glbObsAll: "
## lunch dinner reserve outdoor expensive
## 10000 10000 10000 10000 10000
## liquor table classy kids outdoor.fctr
## 10000 10000 10000 10000 10000
## [1] "numeric data w/ 0s in glbObsAll: "
## nImgs.nexp resX.mad resX.mad.log1p resX.mad.root2
## 228 9353 9353 9353
## resY.mad resY.mad.log1p resY.mad.root2 resXY.mad
## 5442 5442 5442 10915
## resXY.mean.nexp resXY.mad.log1p resXY.mad.root2 resXY.mad.nexp
## 12000 10915 10915 850
## lumR.mean.mad lumR.mad.mad lumB.mean.mad lumB.mad.mad
## 9 9 9 9
## lumG.mean.mad lumG.mad.mad CorRB.mad CorBG.mad
## 9 9 9 9
## CorGR.mad CosSmlRB.mad CosSmlBG.mad CosSmlGR.mad
## 9 9 9 9
## lunch
## 671
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## business_id labels imgResXLst imgResYLst
## 0 NA 0 0
## imgResXYLst imgLumR.meanLst imgLumR.madLst imgLumB.meanLst
## 0 0 0 0
## imgLumB.madLst imgLumG.meanLst imgLumG.madLst imgCorRBLst
## 0 0 0 0
## imgCorBGLst imgCorGRLst imgCosSmlRBLst imgCosSmlBGLst
## 0 0 0 0
## imgCosSmlGRLst .lcn
## 0 10000
## [1] "glb_feats_df:"
## [1] 65 12
## id exclude.as.feat rsp_var
## outdoor.fctr outdoor.fctr TRUE TRUE
## id cor.y exclude.as.feat cor.y.abs cor.high.X
## outdoor outdoor 1 TRUE 1 <NA>
## outdoor.fctr outdoor.fctr NA TRUE NA <NA>
## freqRatio percentUnique zeroVar nzv is.cor.y.abs.low
## outdoor 1.006018 0.1 FALSE FALSE FALSE
## outdoor.fctr NA NA NA NA NA
## interaction.feat shapiro.test.p.value rsp_var_raw id_var
## outdoor NA NA TRUE NA
## outdoor.fctr NA NA NA NA
## rsp_var
## outdoor NA
## outdoor.fctr TRUE
## [1] "glb_feats_df vs. glbObsAll: "
## character(0)
## [1] "glbObsAll vs. glb_feats_df: "
## character(0)
## label step_major step_minor label_minor bgn end
## 15 select.features 7 0 0 224.932 228.228
## 16 fit.models 8 0 0 228.228 NA
## elapsed
## 15 3.296
## 16 NA
8.0: fit modelsfit.models_0_chunk_df <- myadd_chunk(NULL, "fit.models_0_bgn", label.minor = "setup")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_0_bgn 1 0 setup 228.867 NA NA
# load(paste0(glbOut$pfx, "dsk.RData"))
get_model_sel_frmla <- function() {
model_evl_terms <- c(NULL)
# min.aic.fit might not be avl
lclMdlEvlCriteria <-
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)]
for (metric in lclMdlEvlCriteria)
model_evl_terms <- c(model_evl_terms,
ifelse(length(grep("max", metric)) > 0, "-", "+"), metric)
if (glb_is_classification && glb_is_binomial)
model_evl_terms <- c(model_evl_terms, "-", "opt.prob.threshold.OOB")
model_sel_frmla <- as.formula(paste(c("~ ", model_evl_terms), collapse = " "))
return(model_sel_frmla)
}
get_dsp_models_df <- function() {
dsp_models_cols <- c("id",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
dsp_models_df <-
#orderBy(get_model_sel_frmla(), glb_models_df)[, c("id", glbMdlMetricsEval)]
orderBy(get_model_sel_frmla(), glb_models_df)[, dsp_models_cols]
nCvMdl <- sapply(glb_models_lst, function(mdl) nrow(mdl$results))
nParams <- sapply(glb_models_lst, function(mdl) ifelse(mdl$method == "custom", 0,
nrow(subset(modelLookup(mdl$method), parameter != "parameter"))))
# nCvMdl <- nCvMdl[names(nCvMdl) != "avNNet"]
# nParams <- nParams[names(nParams) != "avNNet"]
if (length(cvMdlProblems <- nCvMdl[nCvMdl <= nParams]) > 0) {
print("Cross Validation issues:")
warning("Cross Validation issues:")
print(cvMdlProblems)
}
pltMdls <- setdiff(names(nCvMdl), names(cvMdlProblems))
pltMdls <- setdiff(pltMdls, names(nParams[nParams == 0]))
# length(pltMdls) == 21
png(paste0(glbOut$pfx, "bestTune.png"), width = 480 * 2, height = 480 * 4)
grid.newpage()
pushViewport(viewport(layout = grid.layout(ceiling(length(pltMdls) / 2.0), 2)))
pltIx <- 1
for (mdlId in pltMdls) {
print(ggplot(glb_models_lst[[mdlId]], highBestTune = TRUE) + labs(title = mdlId),
vp = viewport(layout.pos.row = ceiling(pltIx / 2.0),
layout.pos.col = ((pltIx - 1) %% 2) + 1))
pltIx <- pltIx + 1
}
dev.off()
if (all(row.names(dsp_models_df) != dsp_models_df$id))
row.names(dsp_models_df) <- dsp_models_df$id
return(dsp_models_df)
}
#get_dsp_models_df()
if (glb_is_classification && glb_is_binomial &&
(length(unique(glbObsFit[, glb_rsp_var])) < 2))
stop("glbObsFit$", glb_rsp_var, ": contains less than 2 unique values: ",
paste0(unique(glbObsFit[, glb_rsp_var]), collapse=", "))
max_cor_y_x_vars <- orderBy(~ -cor.y.abs,
subset(glb_feats_df, (exclude.as.feat == 0) & !nzv & !is.cor.y.abs.low &
is.na(cor.high.X)))[1:2, "id"]
max_cor_y_x_vars <- max_cor_y_x_vars[!is.na(max_cor_y_x_vars)]
if (length(max_cor_y_x_vars) < 2)
max_cor_y_x_vars <- union(max_cor_y_x_vars, ".pos")
if (!is.null(glb_Baseline_mdl_var)) {
if ((max_cor_y_x_vars[1] != glb_Baseline_mdl_var) &
(glb_feats_df[glb_feats_df$id == max_cor_y_x_vars[1], "cor.y.abs"] >
glb_feats_df[glb_feats_df$id == glb_Baseline_mdl_var, "cor.y.abs"]))
stop(max_cor_y_x_vars[1], " has a higher correlation with ", glb_rsp_var,
" than the Baseline var: ", glb_Baseline_mdl_var)
}
glb_model_type <- ifelse(glb_is_regression, "regression", "classification")
# Model specs
c("id.prefix", "method", "type",
# trainControl params
"preProc.method", "cv.n.folds", "cv.n.repeats", "summary.fn",
# train params
"metric", "metric.maximize", "tune.df")
## [1] "id.prefix" "method" "type"
## [4] "preProc.method" "cv.n.folds" "cv.n.repeats"
## [7] "summary.fn" "metric" "metric.maximize"
## [10] "tune.df"
# Baseline
if (!is.null(glb_Baseline_mdl_var)) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Baseline"), major.inc = FALSE,
label.minor = "mybaseln_classfr")
ret_lst <- myfit_mdl(mdl_id="Baseline",
model_method="mybaseln_classfr",
indep_vars_vctr=glb_Baseline_mdl_var,
rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
}
# Most Frequent Outcome "MFO" model: mean(y) for regression
# Not using caret's nullModel since model stats not avl
# Cannot use rpart for multinomial classification since it predicts non-MFO
if (glb_is_classification) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "MFO"), major.inc = FALSE,
label.minor = "myMFO_classfr")
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "MFO", type = glb_model_type, trainControl.method = "none",
train.method = ifelse(glb_is_regression, "lm", "myMFO_classfr"))),
indep_vars = ".rnorm", rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
# "random" model - only for classification;
# none needed for regression since it is same as MFO
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Random"), major.inc = FALSE,
label.minor = "myrandom_classfr")
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Random", type = glb_model_type, trainControl.method = "none",
train.method = "myrandom_classfr")),
indep_vars = ".rnorm", rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
## label step_major step_minor label_minor bgn end
## 1 fit.models_0_bgn 1 0 setup 228.867 228.905
## 2 fit.models_0_MFO 1 1 myMFO_classfr 228.906 NA
## elapsed
## 1 0.038
## 2 NA
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: MFO###myMFO_classfr"
## [1] " indep_vars: .rnorm"
## [1] "myfit_mdl: setup complete: 0.628000 secs"
## Fitting parameter = none on full training set
## [1] "in MFO.Classifier$fit"
## [1] "unique.vals:"
## [1] N Y
## Levels: N Y
## [1] "unique.prob:"
## y
## Y N
## 0.5014955 0.4985045
## [1] "MFO.val:"
## [1] "Y"
## [1] "myfit_mdl: train complete: 1.226000 secs"
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 -none- numeric
## MFO.val 1 -none- character
## x.names 1 -none- character
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] "myfit_mdl: train diagnostics complete: 1.228000 secs"
## Loading required namespace: pROC
## [1] "entr MFO.Classifier$predict"
## [1] "exit MFO.Classifier$predict"
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.5014955 0.4985045
## 2 0.5014955 0.4985045
## 3 0.5014955 0.4985045
## 4 0.5014955 0.4985045
## 5 0.5014955 0.4985045
## 6 0.5014955 0.4985045
## Prediction
## Reference N Y
## N 0 500
## Y 0 503
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.014955e-01 0.000000e+00 4.700881e-01 5.328941e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 5.126062e-01 2.586405e-110
## [1] "entr MFO.Classifier$predict"
## [1] "exit MFO.Classifier$predict"
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.5014955 0.4985045
## 2 0.5014955 0.4985045
## 3 0.5014955 0.4985045
## 4 0.5014955 0.4985045
## 5 0.5014955 0.4985045
## 6 0.5014955 0.4985045
## Prediction
## Reference N Y
## N 0 497
## Y 0 500
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.015045e-01 0.000000e+00 4.700015e-01 5.329987e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 5.126442e-01 1.162632e-109
## [1] "myfit_mdl: predict complete: 4.337000 secs"
## id feats max.nTuningRuns min.elapsedtime.everything
## 1 MFO###myMFO_classfr .rnorm 0 0.591
## min.elapsedtime.final max.AUCpROC.fit max.Sens.fit max.Spec.fit
## 1 0.003 0.5 0 1
## max.AUCROCR.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.4 0.6679947 0.5014955
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.4700881 0.5328941 0
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5 0 1 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.6680027 0.5015045
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4700015 0.5329987 0
## [1] "myfit_mdl: exit: 4.345000 secs"
## label step_major step_minor label_minor bgn
## 2 fit.models_0_MFO 1 1 myMFO_classfr 228.906
## 3 fit.models_0_Random 1 2 myrandom_classfr 233.258
## end elapsed
## 2 233.257 4.351
## 3 NA NA
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: Random###myrandom_classfr"
## [1] " indep_vars: .rnorm"
## [1] "myfit_mdl: setup complete: 0.451000 secs"
## Fitting parameter = none on full training set
## [1] "myfit_mdl: train complete: 0.814000 secs"
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 table numeric
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] "myfit_mdl: train diagnostics complete: 0.815000 secs"
## [1] "in Random.Classifier$prob"
## Prediction
## Reference N Y
## N 0 500
## Y 0 503
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.014955e-01 0.000000e+00 4.700881e-01 5.328941e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 5.126062e-01 2.586405e-110
## [1] "in Random.Classifier$prob"
## Prediction
## Reference N Y
## N 0 497
## Y 0 500
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.015045e-01 0.000000e+00 4.700015e-01 5.329987e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 5.126442e-01 1.162632e-109
## [1] "myfit_mdl: predict complete: 4.479000 secs"
## id feats max.nTuningRuns
## 1 Random###myrandom_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.AUCpROC.fit
## 1 0.359 0.002 0.5333718
## max.Sens.fit max.Spec.fit max.AUCROCR.fit opt.prob.threshold.fit
## 1 0.476 0.4572565 0.5014672 0.4
## max.f.score.fit max.Accuracy.fit max.AccuracyLower.fit
## 1 0.6679947 0.5014955 0.4700881
## max.AccuracyUpper.fit max.Kappa.fit max.AUCpROC.OOB max.Sens.OOB
## 1 0.5328941 0 0.508497 0.498994
## max.Spec.OOB max.AUCROCR.OOB opt.prob.threshold.OOB max.f.score.OOB
## 1 0.518 0.4974668 0.4 0.6680027
## max.Accuracy.OOB max.AccuracyLower.OOB max.AccuracyUpper.OOB
## 1 0.5015045 0.4700015 0.5329987
## max.Kappa.OOB
## 1 0
## [1] "myfit_mdl: exit: 4.491000 secs"
# Max.cor.Y
# Check impact of cv
# rpart is not a good candidate since caret does not optimize cp (only tuning parameter of rpart) well
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Max.cor.Y.rcv.*X*"), major.inc = FALSE,
label.minor = "glmnet")
## label step_major step_minor label_minor
## 3 fit.models_0_Random 1 2 myrandom_classfr
## 4 fit.models_0_Max.cor.Y.rcv.*X* 1 3 glmnet
## bgn end elapsed
## 3 233.258 237.76 4.502
## 4 237.760 NA NA
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Max.cor.Y.rcv.1X1", type=glb_model_type, trainControl.method="none",
train.method="glmnet")),
indep_vars=max_cor_y_x_vars, rsp_var=glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: Max.cor.Y.rcv.1X1###glmnet"
## [1] " indep_vars: lumR.mean.mean,lumG.mad.mean.cut.fctr"
## [1] "myfit_mdl: setup complete: 0.710000 secs"
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 2.0-2
## Fitting alpha = 0.1, lambda = 0.00141 on full training set
## [1] "myfit_mdl: train complete: 1.707000 secs"
## Length Class Mode
## a0 60 -none- numeric
## beta 240 dgCMatrix S4
## df 60 -none- numeric
## dim 2 -none- numeric
## lambda 60 -none- numeric
## dev.ratio 60 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 4 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) lumG.mad.mean.cut.fctr(0.21,0.22]
## 3.7245809 0.3985227
## lumG.mad.mean.cut.fctr(0.22,0.23] lumG.mad.mean.cut.fctr(0.23,0.37]
## 0.4308500 0.8876484
## lumR.mean.mean
## -8.4289184
## [1] "max lambda < lambdaOpt:"
## [1] "Feats mismatch between coefs_left & rght:"
## [1] "(Intercept)" "lumG.mad.mean.cut.fctr(0.21,0.22]"
## [3] "lumG.mad.mean.cut.fctr(0.22,0.23]" "lumG.mad.mean.cut.fctr(0.23,0.37]"
## [5] "lumR.mean.mean"
## [1] "myfit_mdl: train diagnostics complete: 2.065000 secs"
## Prediction
## Reference N Y
## N 25 475
## Y 12 491
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.144566e-01 2.621553e-02 4.830239e-01 5.458043e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 2.149528e-01 2.558900e-97
## Prediction
## Reference N Y
## N 3 494
## Y 1 499
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.035105e-01 4.048264e-03 4.720022e-01 5.349981e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 4.621685e-01 2.330478e-108
## [1] "myfit_mdl: predict complete: 6.179000 secs"
## id feats
## 1 Max.cor.Y.rcv.1X1###glmnet lumR.mean.mean,lumG.mad.mean.cut.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 0.991 0.02
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.5901332 0.558 0.6222664 0.6302982
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.3 0.668482 0.5144566
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.4830239 0.5458043 0.02621553
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5736479 0.5492958 0.598 0.6006841
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.2 0.6684528 0.5035105
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4720022 0.5349981 0.004048264
## [1] "myfit_mdl: exit: 6.192000 secs"
if (glbMdlCheckRcv) {
# rcv_n_folds == 1 & rcv_n_repeats > 1 crashes
for (rcv_n_folds in seq(3, glb_rcv_n_folds + 2, 2))
for (rcv_n_repeats in seq(1, glb_rcv_n_repeats + 2, 2)) {
# Experiment specific code to avoid caret crash
# lcl_tune_models_df <- rbind(data.frame()
# ,data.frame(method = "glmnet", parameter = "alpha",
# vals = "0.100 0.325 0.550 0.775 1.000")
# ,data.frame(method = "glmnet", parameter = "lambda",
# vals = "9.342e-02")
# )
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst =
list(
id.prefix = paste0("Max.cor.Y.rcv.", rcv_n_folds, "X", rcv_n_repeats),
type = glb_model_type,
# tune.df = lcl_tune_models_df,
trainControl.method = "repeatedcv",
trainControl.number = rcv_n_folds,
trainControl.repeats = rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.method = "glmnet", train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize)),
indep_vars = max_cor_y_x_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
# Add parallel coordinates graph of glb_models_df[, glbMdlMetricsEval] to evaluate cv parameters
tmp_models_cols <- c("id", "max.nTuningRuns",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
print(myplot_parcoord(obs_df = subset(glb_models_df,
grepl("Max.cor.Y.rcv.", id, fixed = TRUE),
select = -feats)[, tmp_models_cols],
id_var = "id"))
}
# Useful for stacking decisions
# fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
# paste0("fit.models_0_", "Max.cor.Y[rcv.1X1.cp.0|]"), major.inc = FALSE,
# label.minor = "rpart")
#
# ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
# id.prefix = "Max.cor.Y.rcv.1X1.cp.0", type = glb_model_type, trainControl.method = "none",
# train.method = "rpart",
# tune.df=data.frame(method="rpart", parameter="cp", min=0.0, max=0.0, by=0.1))),
# indep_vars=max_cor_y_x_vars, rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB)
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
# if (glb_is_regression || glb_is_binomial) # For multinomials this model will be run next by default
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y",
type = glb_model_type, trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "rpart")),
indep_vars = max_cor_y_x_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: Max.cor.Y##rcv#rpart"
## [1] " indep_vars: lumR.mean.mean,lumG.mad.mean.cut.fctr"
## [1] "myfit_mdl: setup complete: 0.773000 secs"
## Loading required package: rpart
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.052 on full training set
## [1] "myfit_mdl: train complete: 2.627000 secs"
## Loading required package: rpart.plot
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 1003
##
## CP nsplit rel error
## 1 0.088 0 1.000
## 2 0.052 1 0.912
##
## Variable importance
## lumR.mean.mean
## 100
##
## Node number 1: 1003 observations, complexity param=0.088
## predicted class=Y expected loss=0.4985045 P(node) =1
## class counts: 500 503
## probabilities: 0.499 0.501
## left son=2 (90 obs) right son=3 (913 obs)
## Primary splits:
## lumR.mean.mean < 0.5703696 to the right, improve=1.196082e+01, (0 missing)
## lumG.mad.mean.cut.fctr(0.23,0.37] < 0.5 to the left, improve=3.767194e+00, (0 missing)
## lumG.mad.mean.cut.fctr(0.22,0.23] < 0.5 to the right, improve=9.210017e-02, (0 missing)
## lumG.mad.mean.cut.fctr(0.21,0.22] < 0.5 to the right, improve=5.413926e-04, (0 missing)
##
## Node number 2: 90 observations
## predicted class=N expected loss=0.2555556 P(node) =0.08973081
## class counts: 67 23
## probabilities: 0.744 0.256
##
## Node number 3: 913 observations
## predicted class=Y expected loss=0.4742607 P(node) =0.9102692
## class counts: 433 480
## probabilities: 0.474 0.526
##
## n= 1003
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 1003 500 Y (0.4985045 0.5014955)
## 2) lumR.mean.mean>=0.5703696 90 23 N (0.7444444 0.2555556) *
## 3) lumR.mean.mean< 0.5703696 913 433 Y (0.4742607 0.5257393) *
## [1] "myfit_mdl: train diagnostics complete: 3.977000 secs"
## Prediction
## Reference N Y
## N 67 433
## Y 23 480
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.453639e-01 8.849074e-02 5.139546e-01 5.765062e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 2.990863e-03 9.105337e-82
## Prediction
## Reference N Y
## N 0 497
## Y 0 500
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.015045e-01 0.000000e+00 4.700015e-01 5.329987e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 5.126442e-01 1.162632e-109
## [1] "myfit_mdl: predict complete: 7.140000 secs"
## id feats
## 1 Max.cor.Y##rcv#rpart lumR.mean.mean,lumG.mad.mean.cut.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 5 1.849 0.015
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.5441372 0.134 0.9542744 0.5441372
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.6779661 0.5569916
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.5139546 0.5765062 0.1133789
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5152716 0.09054326 0.94 0.5152716
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.2 0.6680027 0.5015045
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4700015 0.5329987 0
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02065107 0.04176629
## [1] "myfit_mdl: exit: 7.155000 secs"
if ((length(glbFeatsDateTime) > 0) &&
(sum(grepl(paste(names(glbFeatsDateTime), "\\.day\\.minutes\\.poly\\.", sep = ""),
names(glbObsAll))) > 0)) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Max.cor.Y.Time.Poly"), major.inc = FALSE,
label.minor = "glmnet")
indepVars <- c(max_cor_y_x_vars,
grep(paste(names(glbFeatsDateTime), "\\.day\\.minutes\\.poly\\.", sep = ""),
names(glbObsAll), value = TRUE))
indepVars <- myadjust_interaction_feats(indepVars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y.Time.Poly",
type = glb_model_type, trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
if ((length(glbFeatsDateTime) > 0) &&
(sum(grepl(paste(names(glbFeatsDateTime), "\\.last[[:digit:]]", sep = ""),
names(glbObsAll))) > 0)) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Max.cor.Y.Time.Lag"), major.inc = FALSE,
label.minor = "glmnet")
indepVars <- c(max_cor_y_x_vars,
grep(paste(names(glbFeatsDateTime), "\\.last[[:digit:]]", sep = ""),
names(glbObsAll), value = TRUE))
indepVars <- myadjust_interaction_feats(indepVars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y.Time.Lag",
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
if (length(glbFeatsText) > 0) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Txt.*"), major.inc = FALSE,
label.minor = "glmnet")
indepVars <- c(max_cor_y_x_vars)
for (txtFeat in names(glbFeatsText))
indepVars <- union(indepVars,
grep(paste(str_to_upper(substr(txtFeat, 1, 1)), "\\.(?!([T|P]\\.))", sep = ""),
names(glbObsAll), perl = TRUE, value = TRUE))
indepVars <- myadjust_interaction_feats(indepVars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y.Text.nonTP",
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
indepVars <- c(max_cor_y_x_vars)
for (txtFeat in names(glbFeatsText))
indepVars <- union(indepVars,
grep(paste(str_to_upper(substr(txtFeat, 1, 1)), "\\.T\\.", sep = ""),
names(glbObsAll), perl = TRUE, value = TRUE))
indepVars <- myadjust_interaction_feats(indepVars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y.Text.onlyT",
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
indepVars <- c(max_cor_y_x_vars)
for (txtFeat in names(glbFeatsText))
indepVars <- union(indepVars,
grep(paste(str_to_upper(substr(txtFeat, 1, 1)), "\\.P\\.", sep = ""),
names(glbObsAll), perl = TRUE, value = TRUE))
indepVars <- myadjust_interaction_feats(indepVars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Max.cor.Y.Text.onlyP",
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
# Interactions.High.cor.Y
if (length(int_feats <- setdiff(setdiff(unique(glb_feats_df$cor.high.X), NA),
subset(glb_feats_df, nzv)$id)) > 0) {
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Interact.High.cor.Y"), major.inc = FALSE,
label.minor = "glmnet")
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Interact.High.cor.Y",
type=glb_model_type, trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds, trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method="glmnet")),
indep_vars=c(max_cor_y_x_vars, paste(max_cor_y_x_vars[1], int_feats, sep=":")),
rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
}
## label step_major step_minor label_minor
## 4 fit.models_0_Max.cor.Y.rcv.*X* 1 3 glmnet
## 5 fit.models_0_Interact.High.cor.Y 1 4 glmnet
## bgn end elapsed
## 4 237.760 251.172 13.413
## 5 251.173 NA NA
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: Interact.High.cor.Y##rcv#glmnet"
## [1] " indep_vars: lumR.mean.mean,lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mean.mad,lumR.mean.mean:nImgs.cut.fctr,lumR.mean.mean:CosSmlBG.mad,lumR.mean.mean:resX.mad.log1p,lumR.mean.mean:CosSmlBG.mean,lumR.mean.mean:CosSmlGR.mean,lumR.mean.mean:nImgs.log1p,lumR.mean.mean:resY.mean.log1p,lumR.mean.mean:resY.mean.root2,lumR.mean.mean:CorRB.mad,lumR.mean.mean:resXY.mad,lumR.mean.mean:resXY.mad.nexp,lumR.mean.mean:resX.mean,lumR.mean.mean:resX.mean.nexp,lumR.mean.mean:lumG.mean.mean"
## [1] "myfit_mdl: setup complete: 0.766000 secs"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.00141 on full training set
## [1] "myfit_mdl: train complete: 5.617000 secs"
## Length Class Mode
## a0 88 -none- numeric
## beta 2112 dgCMatrix S4
## df 88 -none- numeric
## dim 2 -none- numeric
## lambda 88 -none- numeric
## dev.ratio 88 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 24 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## 5.156039e+00
## lumG.mad.mean.cut.fctr(0.21,0.22]
## 3.509450e-01
## lumG.mad.mean.cut.fctr(0.22,0.23]
## 2.472727e-01
## lumG.mad.mean.cut.fctr(0.23,0.37]
## 9.393800e-01
## lumR.mean.mean:CorRB.mad
## 9.245071e+00
## lumR.mean.mean:CosSmlBG.mad
## -3.624838e+01
## lumR.mean.mean:CosSmlBG.mean
## -2.500568e+00
## lumR.mean.mean:CosSmlGR.mean
## -1.938818e+01
## lumG.mad.mean.cut.fctr(0.22,0.23]:lumR.mean.mean
## 2.104543e-01
## lumG.mad.mean.cut.fctr(0.23,0.37]:lumR.mean.mean
## -2.896494e-01
## lumR.mean.mean:lumG.mean.mad
## 1.358044e+01
## lumR.mean.mean:lumG.mean.mean
## 9.980560e+00
## lumR.mean.mean:nImgs.cut.fctr(32,60]
## 3.578149e-01
## lumR.mean.mean:nImgs.cut.fctr(60,120]
## 2.286712e-01
## lumR.mean.mean:nImgs.cut.fctr(120,3e+03]
## 6.717469e-01
## lumR.mean.mean:resX.mad.log1p
## 7.366543e-02
## lumR.mean.mean:resX.mean
## 6.139252e-03
## lumR.mean.mean:resX.mean.nexp
## -9.900000e+35
## lumR.mean.mean:resXY.mad.nexp
## 2.903776e-01
## lumR.mean.mean:resY.mean.root2
## 5.129627e-02
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## 5.147751e+00
## lumG.mad.mean.cut.fctr(0.21,0.22]
## 3.498924e-01
## lumG.mad.mean.cut.fctr(0.22,0.23]
## 2.683868e-01
## lumG.mad.mean.cut.fctr(0.23,0.37]
## 1.060074e+00
## lumR.mean.mean:CorRB.mad
## 9.137639e+00
## lumR.mean.mean:CosSmlBG.mad
## -3.708892e+01
## lumR.mean.mean:CosSmlBG.mean
## -2.191445e+00
## lumR.mean.mean:CosSmlGR.mean
## -2.022621e+01
## lumG.mad.mean.cut.fctr(0.22,0.23]:lumR.mean.mean
## 1.629414e-01
## lumG.mad.mean.cut.fctr(0.23,0.37]:lumR.mean.mean
## -5.327290e-01
## lumR.mean.mean:lumG.mean.mad
## 1.365728e+01
## lumR.mean.mean:lumG.mean.mean
## 1.020537e+01
## lumR.mean.mean:nImgs.cut.fctr(32,60]
## 3.723641e-01
## lumR.mean.mean:nImgs.cut.fctr(60,120]
## 2.505451e-01
## lumR.mean.mean:nImgs.cut.fctr(120,3e+03]
## 7.068802e-01
## lumR.mean.mean:nImgs.log1p
## -1.276785e-02
## lumR.mean.mean:resX.mad.log1p
## 7.476897e-02
## lumR.mean.mean:resX.mean
## 6.536950e-03
## lumR.mean.mean:resX.mean.nexp
## -9.900000e+35
## lumR.mean.mean:resXY.mad.nexp
## 2.878255e-01
## lumR.mean.mean:resY.mean.root2
## 6.565317e-02
## [1] "myfit_mdl: train diagnostics complete: 6.233000 secs"
## Prediction
## Reference N Y
## N 153 347
## Y 76 427
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.782652e-01 1.551573e-01 5.470122e-01 6.090575e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 6.465207e-07 2.280416e-39
## Prediction
## Reference N Y
## N 56 441
## Y 22 478
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.356068e-01 6.885019e-02 5.040813e-01 5.669215e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 1.689141e-02 4.642630e-84
## [1] "myfit_mdl: predict complete: 10.358000 secs"
## id
## 1 Interact.High.cor.Y##rcv#glmnet
## feats
## 1 lumR.mean.mean,lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mean.mad,lumR.mean.mean:nImgs.cut.fctr,lumR.mean.mean:CosSmlBG.mad,lumR.mean.mean:resX.mad.log1p,lumR.mean.mean:CosSmlBG.mean,lumR.mean.mean:CosSmlGR.mean,lumR.mean.mean:nImgs.log1p,lumR.mean.mean:resY.mean.log1p,lumR.mean.mean:resY.mean.root2,lumR.mean.mean:CorRB.mad,lumR.mean.mean:resXY.mad,lumR.mean.mean:resXY.mad.nexp,lumR.mean.mean:resX.mean,lumR.mean.mean:resX.mean.nexp,lumR.mean.mean:lumG.mean.mean
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 4.84 0.303
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.6310795 0.622 0.640159 0.666839
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6687549 0.5998532
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.5470122 0.6090575 0.1996452
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5736479 0.5492958 0.598 0.617505
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.3 0.6737139 0.5356068
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.5040813 0.5669215 0.06885019
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01928452 0.03869049
## [1] "myfit_mdl: exit: 10.372000 secs"
# Low.cor.X
fit.models_0_chunk_df <- myadd_chunk(fit.models_0_chunk_df,
paste0("fit.models_0_", "Low.cor.X"), major.inc = FALSE,
label.minor = "glmnet")
## label step_major step_minor label_minor
## 5 fit.models_0_Interact.High.cor.Y 1 4 glmnet
## 6 fit.models_0_Low.cor.X 1 5 glmnet
## bgn end elapsed
## 5 251.173 261.59 10.417
## 6 261.590 NA NA
indep_vars <- subset(glb_feats_df, is.na(cor.high.X) & !nzv &
(exclude.as.feat != 1))[, "id"]
indep_vars <- myadjust_interaction_feats(indep_vars)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Low.cor.X",
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds, trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = "glmnet")),
indep_vars = indep_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
## [1] "myfit_mdl: enter: 0.001000 secs"
## [1] "fitting model: Low.cor.X##rcv#glmnet"
## [1] " indep_vars: lumG.mad.mean.cut.fctr,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,.pos,resX.mad.log1p,lumB.mad.mean,lumR.mean.mad,resXY.mad.nexp,lumR.mad.mad,resY.mean.log1p,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,resXY.mad,nImgs,resX.mean,resX.mean.nexp,CosSmlBG.mad,lumR.mean.mean"
## [1] "myfit_mdl: setup complete: 0.772000 secs"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.775, lambda = 0.0304 on full training set
## [1] "myfit_mdl: train complete: 5.565000 secs"
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = "Low.cor.X", : model's bestTune found at an extreme of
## tuneGrid for parameter: lambda
## Length Class Mode
## a0 79 -none- numeric
## beta 3081 dgCMatrix S4
## df 79 -none- numeric
## dim 2 -none- numeric
## lambda 79 -none- numeric
## dev.ratio 79 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 39 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) CorGR.mad
## 1.4895297 0.2128596
## lumG.mad.mad lumG.mad.mean.cut.fctr(0.23,0.37]
## 2.4650275 0.3235535
## lumG.mean.mad lumG.mean.mean
## 1.5364129 0.3330138
## lumR.mean.mean
## -4.3847496
## [1] "max lambda < lambdaOpt:"
## (Intercept) CorGR.mad
## 1.5061306 0.5664129
## lumG.mad.mad lumG.mad.mean.cut.fctr(0.23,0.37]
## 2.6658442 0.3414774
## lumG.mean.mad lumG.mean.mean
## 1.7938275 0.7033023
## lumR.mean.mean
## -4.8684213
## [1] "myfit_mdl: train diagnostics complete: 6.215000 secs"
## Prediction
## Reference N Y
## N 46 454
## Y 21 482
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.264207e-01 5.038061e-02 4.949830e-01 5.577030e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 6.088015e-02 1.941573e-87
## Prediction
## Reference N Y
## N 46 451
## Y 19 481
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.285858e-01 5.469796e-02 4.970530e-01 5.599492e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 4.659090e-02 5.997578e-88
## [1] "myfit_mdl: predict complete: 10.848000 secs"
## id
## 1 Low.cor.X##rcv#glmnet
## feats
## 1 lumG.mad.mean.cut.fctr,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,.pos,resX.mad.log1p,lumB.mad.mean,lumR.mean.mad,resXY.mad.nexp,lumR.mad.mad,resY.mean.log1p,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,resXY.mad,nImgs,resX.mean,resX.mean.nexp,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 4.779 0.27
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.607175 0.606 0.6083499 0.6433757
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6699097 0.5955435
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.494983 0.557703 0.1910628
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5737143 0.5714286 0.576 0.5990704
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.6717877 0.5285858
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.497053 0.5599492 0.05469796
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02538828 0.05085341
## [1] "myfit_mdl: exit: 10.863000 secs"
fit.models_0_chunk_df <-
myadd_chunk(fit.models_0_chunk_df, "fit.models_0_end", major.inc = FALSE,
label.minor = "teardown")
## label step_major step_minor label_minor bgn end
## 6 fit.models_0_Low.cor.X 1 5 glmnet 261.590 272.502
## 7 fit.models_0_end 1 6 teardown 272.503 NA
## elapsed
## 6 10.912
## 7 NA
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc = FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 16 fit.models 8 0 0 228.228 272.516 44.288
## 17 fit.models 8 1 1 272.517 NA NA
fit.models_1_chunk_df <- myadd_chunk(NULL, "fit.models_1_bgn", label.minor = "setup")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_1_bgn 1 0 setup 276.687 NA NA
# refactor code for outliers / ensure all model runs exclude outliers in this chunk ???
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
topindep_var <- NULL; interact_vars <- NULL;
for (mdl_id_pfx in names(glbMdlFamilies)) {
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, paste0("fit.models_1_", mdl_id_pfx),
major.inc = FALSE, label.minor = "setup")
indep_vars <- NULL;
if (grepl("\\.Interact", mdl_id_pfx)) {
if (is.null(topindep_var) && is.null(interact_vars)) {
# select best glmnet model upto now
dsp_models_df <- orderBy(model_sel_frmla <- get_model_sel_frmla(),
glb_models_df)
dsp_models_df <- subset(dsp_models_df,
grepl(".glmnet", id, fixed = TRUE))
bst_mdl_id <- dsp_models_df$id[1]
mdl_id_pfx <-
paste(c(head(unlist(strsplit(bst_mdl_id, "[.]")), -1), "Interact"),
collapse=".")
# select important features
if (is.null(bst_featsimp_df <-
myget_feats_importance(glb_models_lst[[bst_mdl_id]]))) {
warning("Base model for RFE.Interact: ", bst_mdl_id,
" has no important features")
next
}
topindep_ix <- 1
while (is.null(topindep_var) && (topindep_ix <= nrow(bst_featsimp_df))) {
topindep_var <- row.names(bst_featsimp_df)[topindep_ix]
if (grepl(".fctr", topindep_var, fixed=TRUE))
topindep_var <-
paste0(unlist(strsplit(topindep_var, ".fctr"))[1], ".fctr")
if (topindep_var %in% names(glbFeatsInteractionOnly)) {
topindep_var <- NULL; topindep_ix <- topindep_ix + 1
} else break
}
# select features with importance > max(10, importance of .rnorm) & is not highest
# combine factor dummy features to just the factor feature
if (length(pos_rnorm <-
grep(".rnorm", row.names(bst_featsimp_df), fixed=TRUE)) > 0)
imp_rnorm <- bst_featsimp_df[pos_rnorm, 1] else
imp_rnorm <- NA
imp_cutoff <- max(10, imp_rnorm, na.rm=TRUE)
interact_vars <-
tail(row.names(subset(bst_featsimp_df,
imp > imp_cutoff)), -1)
if (length(interact_vars) > 0) {
interact_vars <-
myadjust_interaction_feats(myextract_actual_feats(interact_vars))
interact_vars <-
interact_vars[!grepl(topindep_var, interact_vars, fixed=TRUE)]
}
### bid0_sp only
# interact_vars <- c(
# "biddable", "D.ratio.sum.TfIdf.wrds.n", "D.TfIdf.sum.stem.stop.Ratio", "D.sum.TfIdf",
# "D.TfIdf.sum.post.stop", "D.TfIdf.sum.post.stem", "D.ratio.wrds.stop.n.wrds.n", "D.chrs.uppr.n.log",
# "D.chrs.n.log", "color.fctr"
# # , "condition.fctr", "prdl.my.descr.fctr"
# )
# interact_vars <- setdiff(interact_vars, c("startprice.dgt2.is9", "color.fctr"))
###
indep_vars <- myextract_actual_feats(row.names(bst_featsimp_df))
indep_vars <- setdiff(indep_vars, topindep_var)
if (length(interact_vars) > 0) {
indep_vars <-
setdiff(indep_vars, myextract_actual_feats(interact_vars))
indep_vars <- c(indep_vars,
paste(topindep_var, setdiff(interact_vars, topindep_var),
sep = "*"))
} else indep_vars <- union(indep_vars, topindep_var)
}
}
if (is.null(indep_vars))
indep_vars <- glb_mdl_feats_lst[[mdl_id_pfx]]
if (is.null(indep_vars) && grepl("RFE\\.", mdl_id_pfx))
indep_vars <- myextract_actual_feats(predictors(rfe_fit_results))
if (is.null(indep_vars))
indep_vars <- subset(glb_feats_df, !nzv & (exclude.as.feat != 1))[, "id"]
if ((length(indep_vars) == 1) && (grepl("^%<d-%", indep_vars))) {
indep_vars <-
eval(parse(text = str_trim(unlist(strsplit(indep_vars, "%<d-%"))[2])))
}
indep_vars <- myadjust_interaction_feats(indep_vars)
if (grepl("\\.Interact", mdl_id_pfx)) {
# if (method != tail(unlist(strsplit(bst_mdl_id, "[.]")), 1)) next
if (is.null(glbMdlFamilies[[mdl_id_pfx]])) {
if (!is.null(glbMdlFamilies[["Best.Interact"]]))
glbMdlFamilies[[mdl_id_pfx]] <-
glbMdlFamilies[["Best.Interact"]]
}
}
if (!is.null(glbObsFitOutliers[[mdl_id_pfx]])) {
fitobs_df <- glbObsFit[!(glbObsFit[, glbFeatsId] %in%
glbObsFitOutliers[[mdl_id_pfx]]), ]
print(sprintf("Outliers removed: %d", nrow(glbObsFit) - nrow(fitobs_df)))
print(setdiff(glbObsFit[, glbFeatsId], fitobs_df[, glbFeatsId]))
} else fitobs_df <- glbObsFit
if (is.null(glbMdlFamilies[[mdl_id_pfx]]))
mdl_methods <- glbMdlMethods else
mdl_methods <- glbMdlFamilies[[mdl_id_pfx]]
for (method in mdl_methods) {
if (method %in% c("rpart", "rf")) {
# rpart: fubar's the tree
# rf: skip the scenario w/ .rnorm for speed
indep_vars <- setdiff(indep_vars, c(".rnorm"))
#mdl_id <- paste0(mdl_id_pfx, ".no.rnorm")
}
fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df,
paste0("fit.models_1_", mdl_id_pfx), major.inc = FALSE,
label.minor = method)
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdl_id_pfx,
type = glb_model_type,
tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv", # or "none" if nominalWorkflow is crashing
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method)),
indep_vars = indep_vars, rsp_var = glb_rsp_var,
fit_df = fitobs_df, OOB_df = glbObsOOB)
# ntv_mdl <- glmnet(x = as.matrix(
# fitobs_df[, indep_vars]),
# y = as.factor(as.character(
# fitobs_df[, glb_rsp_var])),
# family = "multinomial")
# bgn = 1; end = 100;
# ntv_mdl <- glmnet(x = as.matrix(
# subset(fitobs_df, pop.fctr != "crypto")[bgn:end, indep_vars]),
# y = as.factor(as.character(
# subset(fitobs_df, pop.fctr != "crypto")[bgn:end, glb_rsp_var])),
# family = "multinomial")
}
}
## label step_major step_minor label_minor bgn end
## 1 fit.models_1_bgn 1 0 setup 276.687 276.698
## 2 fit.models_1_All.X 1 1 setup 276.698 NA
## elapsed
## 1 0.011
## 2 NA
## label step_major step_minor label_minor bgn end
## 2 fit.models_1_All.X 1 1 setup 276.698 276.705
## 3 fit.models_1_All.X 1 2 glmnet 276.706 NA
## elapsed
## 2 0.007
## 3 NA
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: All.X##rcv#glmnet"
## [1] " indep_vars: lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean"
## [1] "myfit_mdl: setup complete: 0.735000 secs"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.00141 on full training set
## [1] "myfit_mdl: train complete: 10.474000 secs"
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = mdl_id_pfx, : model's bestTune found at an extreme of
## tuneGrid for parameter: alpha
## Length Class Mode
## a0 100 -none- numeric
## beta 5900 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 59 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) .pos
## -1.449891e+01 2.643456e-05
## CorBG.mad CorBG.mean
## 1.720048e+01 6.533482e+00
## CorGR.mad CorGR.mean
## -6.264303e-01 -1.131432e+00
## CorRB.mad CorRB.mean
## 1.954783e+00 8.972890e-01
## CosSmlBG.mad CosSmlBG.mean
## -8.264686e+01 -2.620369e+01
## CosSmlGR.mad CosSmlGR.mean
## 6.276494e+00 -3.670034e+00
## CosSmlRB.mad CosSmlRB.mean
## 7.696698e+00 -4.111970e+00
## lumB.mad.mad lumB.mad.mean
## -2.546541e+00 -1.559111e+01
## lumB.mean.mad lumB.mean.mean
## -5.670072e+00 1.981223e+00
## lumG.mad.mad lumG.mad.mean
## 6.923609e+00 1.453262e+01
## lumG.mad.mean.cut.fctr(0.21,0.22] lumG.mad.mean.cut.fctr(0.22,0.23]
## 9.798029e-02 6.791072e-02
## lumG.mad.mean.cut.fctr(0.23,0.37] lumG.mean.mad
## 3.430964e-01 8.539465e+00
## lumG.mean.mean lumR.mad.mad
## 2.013598e+00 -3.305769e+00
## lumR.mad.mean lumR.mean.mad
## 7.334658e+00 1.463017e+00
## lumR.mean.mean nImgs
## -1.167878e+01 -8.504570e-04
## nImgs.cut.fctr(32,60] nImgs.cut.fctr(60,120]
## 3.735109e-02 -1.917323e-01
## nImgs.cut.fctr(120,3e+03] nImgs.log1p
## -1.918515e-02 4.070804e-01
## nImgs.nexp nImgs.root2
## 2.208673e+01 -2.659685e-02
## resX.mad resX.mad.log1p
## -5.377572e-03 2.224490e-01
## resX.mad.nexp resX.mad.root2
## 4.085039e-01 1.156796e-02
## resX.mean resX.mean.log1p
## -2.197819e-05 3.836671e+00
## resX.mean.nexp resXY.mad
## -9.900000e+35 -2.139297e-05
## resXY.mad.log1p resXY.mad.nexp
## 1.112064e-02 5.126033e-01
## resXY.mad.root2 resXY.mean
## 6.778941e-03 -2.783972e-05
## resXY.mean.log1p resXY.mean.root2
## 1.388996e+00 -1.420357e-05
## resY.mad resY.mad.log1p
## -9.871835e-05 1.166314e-02
## resY.mad.nexp resY.mean
## 5.563463e-02 6.279220e-03
## resY.mean.log1p resY.mean.nexp
## 8.937997e-02 -9.900000e+35
## resY.mean.root2
## 1.428268e-01
## [1] "max lambda < lambdaOpt:"
## (Intercept) .pos
## -1.766701e+01 2.665889e-05
## CorBG.mad CorBG.mean
## 1.742576e+01 6.902726e+00
## CorGR.mad CorGR.mean
## -6.735344e-01 -1.240569e+00
## CorRB.mad CorRB.mean
## 1.949542e+00 8.784349e-01
## CosSmlBG.mad CosSmlBG.mean
## -8.329374e+01 -2.699752e+01
## CosSmlGR.mad CosSmlGR.mean
## 6.306698e+00 -3.525151e+00
## CosSmlRB.mad CosSmlRB.mean
## 7.817653e+00 -4.029393e+00
## lumB.mad.mad lumB.mad.mean
## -2.520761e+00 -1.581341e+01
## lumB.mean.mad lumB.mean.mean
## -5.742412e+00 2.133768e+00
## lumG.mad.mad lumG.mad.mean
## 6.943085e+00 1.469989e+01
## lumG.mad.mean.cut.fctr(0.21,0.22] lumG.mad.mean.cut.fctr(0.22,0.23]
## 9.642240e-02 6.698197e-02
## lumG.mad.mean.cut.fctr(0.23,0.37] lumG.mean.mad
## 3.408218e-01 8.580246e+00
## lumG.mean.mean lumR.mad.mad
## 1.911026e+00 -3.354100e+00
## lumR.mad.mean lumR.mean.mad
## 7.462387e+00 1.489701e+00
## lumR.mean.mean nImgs
## -1.175617e+01 -8.254572e-04
## nImgs.cut.fctr(32,60] nImgs.cut.fctr(60,120]
## 3.124532e-02 -2.013657e-01
## nImgs.cut.fctr(120,3e+03] nImgs.log1p
## -3.071014e-02 4.224753e-01
## nImgs.nexp nImgs.root2
## 2.269017e+01 -2.932907e-02
## resX.mad resX.mad.log1p
## -5.536788e-03 2.299891e-01
## resX.mad.nexp resX.mad.root2
## 4.280276e-01 1.175269e-02
## resX.mean resX.mean.log1p
## -3.772421e-04 3.963698e+00
## resX.mean.nexp resXY.mad
## -9.900000e+35 -2.193618e-05
## resXY.mad.log1p resXY.mad.nexp
## 1.273738e-02 5.303176e-01
## resXY.mad.root2 resXY.mean
## 6.912089e-03 -2.953664e-05
## resXY.mean.log1p resXY.mean.root2
## 1.683664e+00 -1.491646e-05
## resY.mad resY.mad.log1p
## -1.289518e-04 1.332490e-02
## resY.mad.nexp resY.mean
## 6.073816e-02 6.557304e-03
## resY.mean.nexp resY.mean.root2
## -9.900000e+35 1.408124e-01
## [1] "myfit_mdl: train diagnostics complete: 11.197000 secs"
## Prediction
## Reference N Y
## N 187 313
## Y 78 425
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.101695e-01 2.192377e-01 5.792006e-01 6.404897e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 2.992693e-12 2.607746e-32
## Prediction
## Reference N Y
## N 26 471
## Y 9 491
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.185557e-01 3.440984e-02 4.870233e-01 5.499782e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 1.479800e-01 2.725781e-98
## [1] "myfit_mdl: predict complete: 15.916000 secs"
## id
## 1 All.X##rcv#glmnet
## feats
## 1 lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 9.718 0.519
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.637996 0.608 0.667992 0.7005805
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6849315 0.6001859
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.5792006 0.6404897 0.2003224
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5886781 0.5593561 0.618 0.6040644
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.2 0.6716826 0.5185557
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4870233 0.5499782 0.03440984
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02935885 0.05876145
## [1] "myfit_mdl: exit: 15.931000 secs"
## label step_major step_minor label_minor bgn end
## 3 fit.models_1_All.X 1 2 glmnet 276.706 292.642
## 4 fit.models_1_All.X 1 3 glm 292.643 NA
## elapsed
## 3 15.936
## 4 NA
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: All.X##rcv#glm"
## [1] " indep_vars: lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean"
## [1] "myfit_mdl: setup complete: 0.732000 secs"
## Aggregating results
## Fitting final model on full training set
## [1] "myfit_mdl: train complete: 2.432000 secs"
## Warning: not plotting observations with leverage one:
## 523
## Warning: not plotting observations with leverage one:
## 523
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0981 -1.0928 0.3074 1.0532 2.1599
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) 1.134e+04 1.067e+04 1.062
## .pos 3.755e-05 1.208e-04 0.311
## .rnorm 3.848e-03 6.864e-02 0.056
## CorBG.mad 2.108e+01 1.008e+01 2.090
## CorBG.mean 1.407e+01 1.480e+01 0.951
## CorGR.mad -1.587e+00 3.918e+00 -0.405
## CorGR.mean -3.514e+00 7.007e+00 -0.502
## CorRB.mad 1.619e+00 7.726e+00 0.210
## CorRB.mean -1.052e-01 6.808e+00 -0.015
## CosSmlBG.mad -9.134e+01 2.849e+01 -3.206
## CosSmlBG.mean -4.314e+01 3.524e+01 -1.224
## CosSmlGR.mad 1.489e+01 1.137e+01 1.309
## CosSmlGR.mean -1.668e+00 1.841e+01 -0.091
## CosSmlRB.mad NA NA NA
## CosSmlRB.mean NA NA NA
## lumB.mad.mad -1.981e+00 4.456e+00 -0.445
## lumB.mad.mean -1.831e+01 8.110e+00 -2.258
## lumB.mean.mad -7.044e+00 4.472e+00 -1.575
## lumB.mean.mean 4.732e+00 7.529e+00 0.629
## lumG.mad.mad 7.591e+00 4.272e+00 1.777
## lumG.mad.mean 1.669e+01 7.279e+00 2.294
## `lumG.mad.mean.cut.fctr(0.21,0.22]` 4.904e-02 2.766e-01 0.177
## `lumG.mad.mean.cut.fctr(0.22,0.23]` 3.305e-02 2.892e-01 0.114
## `lumG.mad.mean.cut.fctr(0.23,0.37]` 2.823e-01 3.312e-01 0.853
## lumG.mean.mad 9.316e+00 3.566e+00 2.613
## lumG.mean.mean 3.097e-01 5.550e+00 0.056
## lumR.mad.mad -4.221e+00 4.102e+00 -1.029
## lumR.mad.mean 9.180e+00 5.545e+00 1.655
## lumR.mean.mad 1.923e+00 3.246e+00 0.592
## lumR.mean.mean -1.307e+01 4.261e+00 -3.068
## nImgs 4.675e-04 2.265e-03 0.206
## `nImgs.cut.fctr(32,60]` -1.841e-01 3.253e-01 -0.566
## `nImgs.cut.fctr(60,120]` -5.029e-01 4.485e-01 -1.121
## `nImgs.cut.fctr(120,3e+03]` -3.243e-01 5.991e-01 -0.541
## nImgs.log1p 8.940e-01 6.344e-01 1.409
## nImgs.nexp 3.836e+01 6.475e+01 0.592
## nImgs.root2 -1.372e-01 1.624e-01 -0.845
## resX.mad 3.907e-02 4.397e-02 0.889
## resX.mad.log1p 2.207e+00 1.778e+00 1.241
## resX.mad.nexp 1.217e+00 8.575e-01 1.419
## resX.mad.root2 -1.271e+00 1.206e+00 -1.054
## resX.mean -9.159e+00 7.469e+00 -1.226
## resX.mean.log1p -3.807e+03 3.187e+03 -1.194
## resX.mean.nexp -2.728e+124 5.644e+125 -0.048
## resX.mean.root2 7.466e+02 6.166e+02 1.211
## resXY.mad -1.516e-06 8.113e-05 -0.019
## resXY.mad.log1p 3.893e-01 1.060e+00 0.367
## resXY.mad.nexp 2.803e+00 6.256e+00 0.448
## resXY.mad.root2 -5.573e-03 4.132e-02 -0.135
## resXY.mean -1.490e-03 5.629e-03 -0.265
## resXY.mean.log1p -1.025e+02 9.649e+02 -0.106
## resXY.mean.root2 1.745e+00 9.320e+00 0.187
## resY.mad -2.936e-03 3.467e-02 -0.085
## resY.mad.log1p -1.342e-01 1.481e+00 -0.091
## resY.mad.nexp 8.697e-03 7.644e-01 0.011
## resY.mad.root2 9.278e-02 9.801e-01 0.095
## resY.mean 1.309e+00 4.052e+00 0.323
## resY.mean.log1p 3.853e+02 1.668e+03 0.231
## resY.mean.nexp NA NA NA
## resY.mean.root2 -9.103e+01 3.284e+02 -0.277
## Pr(>|z|)
## (Intercept) 0.28808
## .pos 0.75596
## .rnorm 0.95530
## CorBG.mad 0.03661 *
## CorBG.mean 0.34169
## CorGR.mad 0.68552
## CorGR.mean 0.61601
## CorRB.mad 0.83397
## CorRB.mean 0.98766
## CosSmlBG.mad 0.00134 **
## CosSmlBG.mean 0.22093
## CosSmlGR.mad 0.19038
## CosSmlGR.mean 0.92781
## CosSmlRB.mad NA
## CosSmlRB.mean NA
## lumB.mad.mad 0.65667
## lumB.mad.mean 0.02398 *
## lumB.mean.mad 0.11525
## lumB.mean.mean 0.52966
## lumG.mad.mad 0.07557 .
## lumG.mad.mean 0.02181 *
## `lumG.mad.mean.cut.fctr(0.21,0.22]` 0.85928
## `lumG.mad.mean.cut.fctr(0.22,0.23]` 0.90900
## `lumG.mad.mean.cut.fctr(0.23,0.37]` 0.39393
## lumG.mean.mad 0.00898 **
## lumG.mean.mean 0.95549
## lumR.mad.mad 0.30344
## lumR.mad.mean 0.09783 .
## lumR.mean.mad 0.55362
## lumR.mean.mean 0.00216 **
## nImgs 0.83646
## `nImgs.cut.fctr(32,60]` 0.57152
## `nImgs.cut.fctr(60,120]` 0.26212
## `nImgs.cut.fctr(120,3e+03]` 0.58823
## nImgs.log1p 0.15880
## nImgs.nexp 0.55362
## nImgs.root2 0.39810
## resX.mad 0.37424
## resX.mad.log1p 0.21454
## resX.mad.nexp 0.15595
## resX.mad.root2 0.29201
## resX.mean 0.22009
## resX.mean.log1p 0.23234
## resX.mean.nexp 0.96144
## resX.mean.root2 0.22593
## resXY.mad 0.98509
## resXY.mad.log1p 0.71334
## resXY.mad.nexp 0.65408
## resXY.mad.root2 0.89270
## resXY.mean 0.79126
## resXY.mean.log1p 0.91539
## resXY.mean.root2 0.85148
## resY.mad 0.93251
## resY.mad.log1p 0.92779
## resY.mad.nexp 0.99092
## resY.mad.root2 0.92458
## resY.mean 0.74668
## resY.mean.log1p 0.81736
## resY.mean.nexp NA
## resY.mean.root2 0.78164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1390.4 on 1002 degrees of freedom
## Residual deviance: 1264.4 on 946 degrees of freedom
## AIC: 1378.4
##
## Number of Fisher Scoring iterations: 11
##
## [1] "myfit_mdl: train diagnostics complete: 3.258000 secs"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Prediction
## Reference N Y
## N 193 307
## Y 80 423
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.141575e-01 2.272620e-01 5.832335e-01 6.444093e-01 5.014955e-01
## AccuracyPValue McnemarPValue
## 4.809358e-13 1.511853e-30
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Prediction
## Reference N Y
## N 0 497
## Y 0 500
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.015045e-01 0.000000e+00 4.700015e-01 5.329987e-01 5.015045e-01
## AccuracyPValue McnemarPValue
## 5.126442e-01 1.162632e-109
## [1] "myfit_mdl: predict complete: 7.969000 secs"
## id
## 1 All.X##rcv#glm
## feats
## 1 lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 1.678 0.091
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.6499781 0.626 0.6739563 0.7059085
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6861314 0.5875394
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.5832335 0.6444093 0.1749541
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5826358 0.5452716 0.62 0.6019115
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0 0.6680027 0.5015045
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4700015 0.5329987 0
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03042227 0.06088308
## [1] "myfit_mdl: exit: 7.984000 secs"
# Check if other preProcess methods improve model performance
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, "fit.models_1_preProc", major.inc = FALSE,
label.minor = "preProc")
## label step_major step_minor label_minor bgn end
## 4 fit.models_1_All.X 1 3 glm 292.643 300.665
## 5 fit.models_1_preProc 1 4 preProc 300.666 NA
## elapsed
## 4 8.022
## 5 NA
mdl_id <- orderBy(get_model_sel_frmla(), glb_models_df)[1, "id"]
indep_vars_vctr <- trim(unlist(strsplit(glb_models_df[glb_models_df$id == mdl_id,
"feats"], "[,]")))
method <- tail(unlist(strsplit(mdl_id, "[.]")), 1)
mdl_id_pfx <- paste0(head(unlist(strsplit(mdl_id, "[.]")), -1), collapse = ".")
if (!is.null(glbObsFitOutliers[[mdl_id_pfx]])) {
fitobs_df <- glbObsFit[!(glbObsFit[, glbFeatsId] %in%
glbObsFitOutliers[[mdl_id_pfx]]), ]
print(sprintf("Outliers removed: %d", nrow(glbObsFit) - nrow(fitobs_df)))
print(setdiff(glbObsFit[, glbFeatsId], fitobs_df[, glbFeatsId]))
} else fitobs_df <- glbObsFit
for (prePr in glb_preproc_methods) {
# The operations are applied in this order:
# Box-Cox/Yeo-Johnson transformation, centering, scaling, range, imputation, PCA, ICA then spatial sign.
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix=mdl_id_pfx,
type=glb_model_type, tune.df=glbMdlTuneParams,
trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds,
trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method=method, train.preProcess=prePr)),
indep_vars=indep_vars_vctr, rsp_var=glb_rsp_var,
fit_df=fitobs_df, OOB_df=glbObsOOB)
}
# If (All|RFE).X.glm is less accurate than Low.Cor.X.glm
# check NA coefficients & filter appropriate terms in indep_vars_vctr
# if (method == "glm") {
# orig_glm <- glb_models_lst[[paste0(mdl_id, ".", model_method)]]$finalModel
# orig_glm <- glb_models_lst[["All.X.glm"]]$finalModel; print(summary(orig_glm))
# orig_glm <- glb_models_lst[["RFE.X.glm"]]$finalModel; print(summary(orig_glm))
# require(car)
# vif_orig_glm <- vif(orig_glm); print(vif_orig_glm)
# # if vif errors out with "there are aliased coefficients in the model"
# alias_orig_glm <- alias(orig_glm); alias_complete_orig_glm <- (alias_orig_glm$Complete > 0); alias_complete_orig_glm <- alias_complete_orig_glm[rowSums(alias_complete_orig_glm) > 0, colSums(alias_complete_orig_glm) > 0]; print(alias_complete_orig_glm)
# print(vif_orig_glm[!is.na(vif_orig_glm) & (vif_orig_glm == Inf)])
# print(which.max(vif_orig_glm))
# print(sort(vif_orig_glm[vif_orig_glm >= 1.0e+03], decreasing=TRUE))
# glbObsFit[c(1143, 3637, 3953, 4105), c("UniqueID", "Popular", "H.P.quandary", "Headline")]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.chrs.n.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.chrs.n.log", glb_feats_df$id, value=TRUE), ]
# all.equal(glbObsAll$S.chrs.uppr.n.log, glbObsAll$A.chrs.uppr.n.log)
# cor(glbObsAll$S.T.herald, glbObsAll$S.T.tribun)
# mydspObs(Abstract.contains="[Dd]iar", cols=("Abstract"), all=TRUE)
# subset(glb_feats_df, cor.y.abs <= glb_feats_df[glb_feats_df$id == ".rnorm", "cor.y.abs"])
# corxx_mtrx <- cor(data.matrix(glbObsAll[, setdiff(names(glbObsAll), myfind_chr_cols_df(glbObsAll))]), use="pairwise.complete.obs"); abs_corxx_mtrx <- abs(corxx_mtrx); diag(abs_corxx_mtrx) <- 0
# which.max(abs_corxx_mtrx["S.T.tribun", ])
# abs_corxx_mtrx["A.npnct08.log", "S.npnct08.log"]
# step_glm <- step(orig_glm)
# }
# Since caret does not optimize rpart well
# if (method == "rpart")
# ret_lst <- myfit_mdl(mdl_id=paste0(mdl_id_pfx, ".cp.0"), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=0, tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
# User specified
# Ensure at least 2 vars in each regression; else varImp crashes
# sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df; sav_featsimp_df <- glb_featsimp_df; all.equal(sav_featsimp_df, glb_featsimp_df)
# glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df; glm_featsimp_df <- sav_featsimp_df
# easier to exclude features
# require(gdata) # needed for trim
# mdl_id <- "";
# indep_vars_vctr <- head(subset(glb_models_df, grepl("All\\.X\\.", mdl_id), select=feats)
# , 1)[, "feats"]
# indep_vars_vctr <- trim(unlist(strsplit(indep_vars_vctr, "[,]")))
# indep_vars_vctr <- setdiff(indep_vars_vctr, ".rnorm")
# easier to include features
#stop(here"); sav_models_df <- glb_models_df; glb_models_df <- sav_models_df
# !_sp
# mdl_id <- "csm"; indep_vars_vctr <- c(NULL
# ,"prdline.my.fctr", "prdline.my.fctr:.clusterid.fctr"
# ,"prdline.my.fctr*biddable"
# #,"prdline.my.fctr*startprice.log"
# #,"prdline.my.fctr*startprice.diff"
# ,"prdline.my.fctr*condition.fctr"
# ,"prdline.my.fctr*D.terms.post.stop.n"
# #,"prdline.my.fctr*D.terms.post.stem.n"
# ,"prdline.my.fctr*cellular.fctr"
# # ,"<feat1>:<feat2>"
# )
# for (method in glbMdlMethods) {
# ret_lst <- myfit_mdl(mdl_id=mdl_id, model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=glb_rcv_n_folds, tune_models_df=glbMdlTuneParams)
# csm_mdl_id <- paste0(mdl_id, ".", method)
# csm_featsimp_df <- myget_feats_importance(glb_models_lst[[paste0(mdl_id, ".",
# method)]]); print(head(csm_featsimp_df))
# }
###
# Ntv.1.lm <- lm(reformulate(indep_vars_vctr, glb_rsp_var), glbObsTrn); print(summary(Ntv.1.lm))
#glb_models_df[, "max.Accuracy.OOB", FALSE]
#varImp(glb_models_lst[["Low.cor.X.glm"]])
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.2.glm"]])$imp)
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.3.glm"]])$imp)
#glb_feats_df[grepl("npnct28", glb_feats_df$id), ]
# User specified bivariate models
# indep_vars_vctr_lst <- list()
# for (feat in setdiff(names(glbObsFit),
# union(glb_rsp_var, glbFeatsExclude)))
# indep_vars_vctr_lst[["feat"]] <- feat
# User specified combinatorial models
# indep_vars_vctr_lst <- list()
# combn_mtrx <- combn(c("<feat1_name>", "<feat2_name>", "<featn_name>"),
# <num_feats_to_choose>)
# for (combn_ix in 1:ncol(combn_mtrx))
# #print(combn_mtrx[, combn_ix])
# indep_vars_vctr_lst[[combn_ix]] <- combn_mtrx[, combn_ix]
# template for myfit_mdl
# rf is hard-coded in caret to recognize only Accuracy / Kappa evaluation metrics
# only for OOB in trainControl ?
# ret_lst <- myfit_mdl_fn(mdl_id=paste0(mdl_id_pfx, ""), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=glb_rcv_n_folds, tune_models_df=glbMdlTuneParams,
# model_loss_mtrx=glbMdlMetric_terms,
# model_summaryFunction=glbMdlMetricSummaryFn,
# model_metric=glbMdlMetricSummary,
# model_metric_maximize=glbMdlMetricMaximize)
# Simplify a model
# fit_df <- glbObsFit; glb_mdl <- step(<complex>_mdl)
# Non-caret models
# rpart_area_mdl <- rpart(reformulate("Area", response=glb_rsp_var),
# data=glbObsFit, #method="class",
# control=rpart.control(cp=0.12),
# parms=list(loss=glbMdlMetric_terms))
# print("rpart_sel_wlm_mdl"); prp(rpart_sel_wlm_mdl)
#
print(glb_models_df)
## id
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## All.X##rcv#glm All.X##rcv#glm
## feats
## MFO###myMFO_classfr .rnorm
## Random###myrandom_classfr .rnorm
## Max.cor.Y.rcv.1X1###glmnet lumR.mean.mean,lumG.mad.mean.cut.fctr
## Max.cor.Y##rcv#rpart lumR.mean.mean,lumG.mad.mean.cut.fctr
## Interact.High.cor.Y##rcv#glmnet lumR.mean.mean,lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mean.mad,lumR.mean.mean:nImgs.cut.fctr,lumR.mean.mean:CosSmlBG.mad,lumR.mean.mean:resX.mad.log1p,lumR.mean.mean:CosSmlBG.mean,lumR.mean.mean:CosSmlGR.mean,lumR.mean.mean:nImgs.log1p,lumR.mean.mean:resY.mean.log1p,lumR.mean.mean:resY.mean.root2,lumR.mean.mean:CorRB.mad,lumR.mean.mean:resXY.mad,lumR.mean.mean:resXY.mad.nexp,lumR.mean.mean:resX.mean,lumR.mean.mean:resX.mean.nexp,lumR.mean.mean:lumG.mean.mean
## Low.cor.X##rcv#glmnet lumG.mad.mean.cut.fctr,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,.pos,resX.mad.log1p,lumB.mad.mean,lumR.mean.mad,resXY.mad.nexp,lumR.mad.mad,resY.mean.log1p,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,resXY.mad,nImgs,resX.mean,resX.mean.nexp,CosSmlBG.mad,lumR.mean.mean
## All.X##rcv#glmnet lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## All.X##rcv#glm lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns min.elapsedtime.everything
## MFO###myMFO_classfr 0 0.591
## Random###myrandom_classfr 0 0.359
## Max.cor.Y.rcv.1X1###glmnet 0 0.991
## Max.cor.Y##rcv#rpart 5 1.849
## Interact.High.cor.Y##rcv#glmnet 25 4.840
## Low.cor.X##rcv#glmnet 25 4.779
## All.X##rcv#glmnet 25 9.718
## All.X##rcv#glm 1 1.678
## min.elapsedtime.final max.AUCpROC.fit
## MFO###myMFO_classfr 0.003 0.5000000
## Random###myrandom_classfr 0.002 0.5333718
## Max.cor.Y.rcv.1X1###glmnet 0.020 0.5901332
## Max.cor.Y##rcv#rpart 0.015 0.5441372
## Interact.High.cor.Y##rcv#glmnet 0.303 0.6310795
## Low.cor.X##rcv#glmnet 0.270 0.6071750
## All.X##rcv#glmnet 0.519 0.6379960
## All.X##rcv#glm 0.091 0.6499781
## max.Sens.fit max.Spec.fit max.AUCROCR.fit
## MFO###myMFO_classfr 0.000 1.0000000 0.5000000
## Random###myrandom_classfr 0.476 0.4572565 0.5014672
## Max.cor.Y.rcv.1X1###glmnet 0.558 0.6222664 0.6302982
## Max.cor.Y##rcv#rpart 0.134 0.9542744 0.5441372
## Interact.High.cor.Y##rcv#glmnet 0.622 0.6401590 0.6668390
## Low.cor.X##rcv#glmnet 0.606 0.6083499 0.6433757
## All.X##rcv#glmnet 0.608 0.6679920 0.7005805
## All.X##rcv#glm 0.626 0.6739563 0.7059085
## opt.prob.threshold.fit max.f.score.fit
## MFO###myMFO_classfr 0.4 0.6679947
## Random###myrandom_classfr 0.4 0.6679947
## Max.cor.Y.rcv.1X1###glmnet 0.3 0.6684820
## Max.cor.Y##rcv#rpart 0.5 0.6779661
## Interact.High.cor.Y##rcv#glmnet 0.4 0.6687549
## Low.cor.X##rcv#glmnet 0.4 0.6699097
## All.X##rcv#glmnet 0.4 0.6849315
## All.X##rcv#glm 0.4 0.6861314
## max.Accuracy.fit max.AccuracyLower.fit
## MFO###myMFO_classfr 0.5014955 0.4700881
## Random###myrandom_classfr 0.5014955 0.4700881
## Max.cor.Y.rcv.1X1###glmnet 0.5144566 0.4830239
## Max.cor.Y##rcv#rpart 0.5569916 0.5139546
## Interact.High.cor.Y##rcv#glmnet 0.5998532 0.5470122
## Low.cor.X##rcv#glmnet 0.5955435 0.4949830
## All.X##rcv#glmnet 0.6001859 0.5792006
## All.X##rcv#glm 0.5875394 0.5832335
## max.AccuracyUpper.fit max.Kappa.fit
## MFO###myMFO_classfr 0.5328941 0.00000000
## Random###myrandom_classfr 0.5328941 0.00000000
## Max.cor.Y.rcv.1X1###glmnet 0.5458043 0.02621553
## Max.cor.Y##rcv#rpart 0.5765062 0.11337894
## Interact.High.cor.Y##rcv#glmnet 0.6090575 0.19964521
## Low.cor.X##rcv#glmnet 0.5577030 0.19106285
## All.X##rcv#glmnet 0.6404897 0.20032243
## All.X##rcv#glm 0.6444093 0.17495411
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB
## MFO###myMFO_classfr 0.5000000 0.00000000 1.000
## Random###myrandom_classfr 0.5084970 0.49899396 0.518
## Max.cor.Y.rcv.1X1###glmnet 0.5736479 0.54929577 0.598
## Max.cor.Y##rcv#rpart 0.5152716 0.09054326 0.940
## Interact.High.cor.Y##rcv#glmnet 0.5736479 0.54929577 0.598
## Low.cor.X##rcv#glmnet 0.5737143 0.57142857 0.576
## All.X##rcv#glmnet 0.5886781 0.55935614 0.618
## All.X##rcv#glm 0.5826358 0.54527163 0.620
## max.AUCROCR.OOB opt.prob.threshold.OOB
## MFO###myMFO_classfr 0.5000000 0.4
## Random###myrandom_classfr 0.4974668 0.4
## Max.cor.Y.rcv.1X1###glmnet 0.6006841 0.2
## Max.cor.Y##rcv#rpart 0.5152716 0.2
## Interact.High.cor.Y##rcv#glmnet 0.6175050 0.3
## Low.cor.X##rcv#glmnet 0.5990704 0.4
## All.X##rcv#glmnet 0.6040644 0.2
## All.X##rcv#glm 0.6019115 0.0
## max.f.score.OOB max.Accuracy.OOB
## MFO###myMFO_classfr 0.6680027 0.5015045
## Random###myrandom_classfr 0.6680027 0.5015045
## Max.cor.Y.rcv.1X1###glmnet 0.6684528 0.5035105
## Max.cor.Y##rcv#rpart 0.6680027 0.5015045
## Interact.High.cor.Y##rcv#glmnet 0.6737139 0.5356068
## Low.cor.X##rcv#glmnet 0.6717877 0.5285858
## All.X##rcv#glmnet 0.6716826 0.5185557
## All.X##rcv#glm 0.6680027 0.5015045
## max.AccuracyLower.OOB
## MFO###myMFO_classfr 0.4700015
## Random###myrandom_classfr 0.4700015
## Max.cor.Y.rcv.1X1###glmnet 0.4720022
## Max.cor.Y##rcv#rpart 0.4700015
## Interact.High.cor.Y##rcv#glmnet 0.5040813
## Low.cor.X##rcv#glmnet 0.4970530
## All.X##rcv#glmnet 0.4870233
## All.X##rcv#glm 0.4700015
## max.AccuracyUpper.OOB max.Kappa.OOB
## MFO###myMFO_classfr 0.5329987 0.000000000
## Random###myrandom_classfr 0.5329987 0.000000000
## Max.cor.Y.rcv.1X1###glmnet 0.5349981 0.004048264
## Max.cor.Y##rcv#rpart 0.5329987 0.000000000
## Interact.High.cor.Y##rcv#glmnet 0.5669215 0.068850191
## Low.cor.X##rcv#glmnet 0.5599492 0.054697965
## All.X##rcv#glmnet 0.5499782 0.034409843
## All.X##rcv#glm 0.5329987 0.000000000
## max.AccuracySD.fit max.KappaSD.fit
## MFO###myMFO_classfr NA NA
## Random###myrandom_classfr NA NA
## Max.cor.Y.rcv.1X1###glmnet NA NA
## Max.cor.Y##rcv#rpart 0.02065107 0.04176629
## Interact.High.cor.Y##rcv#glmnet 0.01928452 0.03869049
## Low.cor.X##rcv#glmnet 0.02538828 0.05085341
## All.X##rcv#glmnet 0.02935885 0.05876145
## All.X##rcv#glm 0.03042227 0.06088308
rm(ret_lst)
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, "fit.models_1_end", major.inc = FALSE,
label.minor = "teardown")
## label step_major step_minor label_minor bgn end
## 5 fit.models_1_preProc 1 4 preProc 300.666 300.732
## 6 fit.models_1_end 1 5 teardown 300.732 NA
## elapsed
## 5 0.066
## 6 NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc = FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 17 fit.models 8 1 1 272.517 300.742 28.226
## 18 fit.models 8 2 2 300.743 NA NA
fit.models_2_chunk_df <-
myadd_chunk(NULL, "fit.models_2_bgn", label.minor = "setup")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_2_bgn 1 0 setup 304.123 NA NA
plt_models_df <- glb_models_df[, -grep("SD|Upper|Lower", names(glb_models_df))]
for (var in grep("^min.", names(plt_models_df), value=TRUE)) {
plt_models_df[, sub("min.", "inv.", var)] <-
#ifelse(all(is.na(tmp <- plt_models_df[, var])), NA, 1.0 / tmp)
1.0 / plt_models_df[, var]
plt_models_df <- plt_models_df[ , -grep(var, names(plt_models_df))]
}
print(plt_models_df)
## id
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## All.X##rcv#glm All.X##rcv#glm
## feats
## MFO###myMFO_classfr .rnorm
## Random###myrandom_classfr .rnorm
## Max.cor.Y.rcv.1X1###glmnet lumR.mean.mean,lumG.mad.mean.cut.fctr
## Max.cor.Y##rcv#rpart lumR.mean.mean,lumG.mad.mean.cut.fctr
## Interact.High.cor.Y##rcv#glmnet lumR.mean.mean,lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mad.mean.cut.fctr,lumR.mean.mean:lumG.mean.mad,lumR.mean.mean:nImgs.cut.fctr,lumR.mean.mean:CosSmlBG.mad,lumR.mean.mean:resX.mad.log1p,lumR.mean.mean:CosSmlBG.mean,lumR.mean.mean:CosSmlGR.mean,lumR.mean.mean:nImgs.log1p,lumR.mean.mean:resY.mean.log1p,lumR.mean.mean:resY.mean.root2,lumR.mean.mean:CorRB.mad,lumR.mean.mean:resXY.mad,lumR.mean.mean:resXY.mad.nexp,lumR.mean.mean:resX.mean,lumR.mean.mean:resX.mean.nexp,lumR.mean.mean:lumG.mean.mean
## Low.cor.X##rcv#glmnet lumG.mad.mean.cut.fctr,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,.pos,resX.mad.log1p,lumB.mad.mean,lumR.mean.mad,resXY.mad.nexp,lumR.mad.mad,resY.mean.log1p,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,resXY.mad,nImgs,resX.mean,resX.mean.nexp,CosSmlBG.mad,lumR.mean.mean
## All.X##rcv#glmnet lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## All.X##rcv#glm lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns max.AUCpROC.fit
## MFO###myMFO_classfr 0 0.5000000
## Random###myrandom_classfr 0 0.5333718
## Max.cor.Y.rcv.1X1###glmnet 0 0.5901332
## Max.cor.Y##rcv#rpart 5 0.5441372
## Interact.High.cor.Y##rcv#glmnet 25 0.6310795
## Low.cor.X##rcv#glmnet 25 0.6071750
## All.X##rcv#glmnet 25 0.6379960
## All.X##rcv#glm 1 0.6499781
## max.Sens.fit max.Spec.fit max.AUCROCR.fit
## MFO###myMFO_classfr 0.000 1.0000000 0.5000000
## Random###myrandom_classfr 0.476 0.4572565 0.5014672
## Max.cor.Y.rcv.1X1###glmnet 0.558 0.6222664 0.6302982
## Max.cor.Y##rcv#rpart 0.134 0.9542744 0.5441372
## Interact.High.cor.Y##rcv#glmnet 0.622 0.6401590 0.6668390
## Low.cor.X##rcv#glmnet 0.606 0.6083499 0.6433757
## All.X##rcv#glmnet 0.608 0.6679920 0.7005805
## All.X##rcv#glm 0.626 0.6739563 0.7059085
## opt.prob.threshold.fit max.f.score.fit
## MFO###myMFO_classfr 0.4 0.6679947
## Random###myrandom_classfr 0.4 0.6679947
## Max.cor.Y.rcv.1X1###glmnet 0.3 0.6684820
## Max.cor.Y##rcv#rpart 0.5 0.6779661
## Interact.High.cor.Y##rcv#glmnet 0.4 0.6687549
## Low.cor.X##rcv#glmnet 0.4 0.6699097
## All.X##rcv#glmnet 0.4 0.6849315
## All.X##rcv#glm 0.4 0.6861314
## max.Accuracy.fit max.Kappa.fit
## MFO###myMFO_classfr 0.5014955 0.00000000
## Random###myrandom_classfr 0.5014955 0.00000000
## Max.cor.Y.rcv.1X1###glmnet 0.5144566 0.02621553
## Max.cor.Y##rcv#rpart 0.5569916 0.11337894
## Interact.High.cor.Y##rcv#glmnet 0.5998532 0.19964521
## Low.cor.X##rcv#glmnet 0.5955435 0.19106285
## All.X##rcv#glmnet 0.6001859 0.20032243
## All.X##rcv#glm 0.5875394 0.17495411
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB
## MFO###myMFO_classfr 0.5000000 0.00000000 1.000
## Random###myrandom_classfr 0.5084970 0.49899396 0.518
## Max.cor.Y.rcv.1X1###glmnet 0.5736479 0.54929577 0.598
## Max.cor.Y##rcv#rpart 0.5152716 0.09054326 0.940
## Interact.High.cor.Y##rcv#glmnet 0.5736479 0.54929577 0.598
## Low.cor.X##rcv#glmnet 0.5737143 0.57142857 0.576
## All.X##rcv#glmnet 0.5886781 0.55935614 0.618
## All.X##rcv#glm 0.5826358 0.54527163 0.620
## max.AUCROCR.OOB opt.prob.threshold.OOB
## MFO###myMFO_classfr 0.5000000 0.4
## Random###myrandom_classfr 0.4974668 0.4
## Max.cor.Y.rcv.1X1###glmnet 0.6006841 0.2
## Max.cor.Y##rcv#rpart 0.5152716 0.2
## Interact.High.cor.Y##rcv#glmnet 0.6175050 0.3
## Low.cor.X##rcv#glmnet 0.5990704 0.4
## All.X##rcv#glmnet 0.6040644 0.2
## All.X##rcv#glm 0.6019115 0.0
## max.f.score.OOB max.Accuracy.OOB
## MFO###myMFO_classfr 0.6680027 0.5015045
## Random###myrandom_classfr 0.6680027 0.5015045
## Max.cor.Y.rcv.1X1###glmnet 0.6684528 0.5035105
## Max.cor.Y##rcv#rpart 0.6680027 0.5015045
## Interact.High.cor.Y##rcv#glmnet 0.6737139 0.5356068
## Low.cor.X##rcv#glmnet 0.6717877 0.5285858
## All.X##rcv#glmnet 0.6716826 0.5185557
## All.X##rcv#glm 0.6680027 0.5015045
## max.Kappa.OOB inv.elapsedtime.everything
## MFO###myMFO_classfr 0.000000000 1.6920474
## Random###myrandom_classfr 0.000000000 2.7855153
## Max.cor.Y.rcv.1X1###glmnet 0.004048264 1.0090817
## Max.cor.Y##rcv#rpart 0.000000000 0.5408329
## Interact.High.cor.Y##rcv#glmnet 0.068850191 0.2066116
## Low.cor.X##rcv#glmnet 0.054697965 0.2092488
## All.X##rcv#glmnet 0.034409843 0.1029018
## All.X##rcv#glm 0.000000000 0.5959476
## inv.elapsedtime.final
## MFO###myMFO_classfr 333.333333
## Random###myrandom_classfr 500.000000
## Max.cor.Y.rcv.1X1###glmnet 50.000000
## Max.cor.Y##rcv#rpart 66.666667
## Interact.High.cor.Y##rcv#glmnet 3.300330
## Low.cor.X##rcv#glmnet 3.703704
## All.X##rcv#glmnet 1.926782
## All.X##rcv#glm 10.989011
# print(myplot_radar(radar_inp_df=plt_models_df))
# print(myplot_radar(radar_inp_df=subset(plt_models_df,
# !(mdl_id %in% grep("random|MFO", plt_models_df$id, value=TRUE)))))
# Compute CI for <metric>SD
glb_models_df <- mutate(glb_models_df,
max.df = ifelse(max.nTuningRuns > 1, max.nTuningRuns - 1, NA),
min.sd2ci.scaler = ifelse(is.na(max.df), NA, qt(0.975, max.df)))
for (var in grep("SD", names(glb_models_df), value=TRUE)) {
# Does CI alredy exist ?
var_components <- unlist(strsplit(var, "SD"))
varActul <- paste0(var_components[1], var_components[2])
varUpper <- paste0(var_components[1], "Upper", var_components[2])
varLower <- paste0(var_components[1], "Lower", var_components[2])
if (varUpper %in% names(glb_models_df)) {
warning(varUpper, " already exists in glb_models_df")
# Assuming Lower also exists
next
}
print(sprintf("var:%s", var))
# CI is dependent on sample size in t distribution; df=n-1
glb_models_df[, varUpper] <- glb_models_df[, varActul] +
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
glb_models_df[, varLower] <- glb_models_df[, varActul] -
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
}
## Warning: max.AccuracyUpper.fit already exists in glb_models_df
## [1] "var:max.KappaSD.fit"
# Plot metrics with CI
plt_models_df <- glb_models_df[, "id", FALSE]
pltCI_models_df <- glb_models_df[, "id", FALSE]
for (var in grep("Upper", names(glb_models_df), value=TRUE)) {
var_components <- unlist(strsplit(var, "Upper"))
col_name <- unlist(paste(var_components, collapse=""))
plt_models_df[, col_name] <- glb_models_df[, col_name]
for (name in paste0(var_components[1], c("Upper", "Lower"), var_components[2]))
pltCI_models_df[, name] <- glb_models_df[, name]
}
build_statsCI_data <- function(plt_models_df) {
mltd_models_df <- melt(plt_models_df, id.vars="id")
mltd_models_df$data <- sapply(1:nrow(mltd_models_df),
function(row_ix) tail(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]), "[.]")), 1))
mltd_models_df$label <- sapply(1:nrow(mltd_models_df),
function(row_ix) head(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]),
paste0(".", mltd_models_df[row_ix, "data"]))), 1))
#print(mltd_models_df)
return(mltd_models_df)
}
mltd_models_df <- build_statsCI_data(plt_models_df)
mltdCI_models_df <- melt(pltCI_models_df, id.vars="id")
for (row_ix in 1:nrow(mltdCI_models_df)) {
for (type in c("Upper", "Lower")) {
if (length(var_components <- unlist(strsplit(
as.character(mltdCI_models_df[row_ix, "variable"]), type))) > 1) {
#print(sprintf("row_ix:%d; type:%s; ", row_ix, type))
mltdCI_models_df[row_ix, "label"] <- var_components[1]
mltdCI_models_df[row_ix, "data"] <-
unlist(strsplit(var_components[2], "[.]"))[2]
mltdCI_models_df[row_ix, "type"] <- type
break
}
}
}
wideCI_models_df <- reshape(subset(mltdCI_models_df, select=-variable),
timevar="type",
idvar=setdiff(names(mltdCI_models_df), c("type", "value", "variable")),
direction="wide")
#print(wideCI_models_df)
mrgdCI_models_df <- merge(wideCI_models_df, mltd_models_df, all.x=TRUE)
#print(mrgdCI_models_df)
# Merge stats back in if CIs don't exist
goback_vars <- c()
for (var in unique(mltd_models_df$label)) {
for (type in unique(mltd_models_df$data)) {
var_type <- paste0(var, ".", type)
# if this data is already present, next
if (var_type %in% unique(paste(mltd_models_df$label, mltd_models_df$data,
sep=".")))
next
#print(sprintf("var_type:%s", var_type))
goback_vars <- c(goback_vars, var_type)
}
}
if (length(goback_vars) > 0) {
mltd_goback_df <- build_statsCI_data(glb_models_df[, c("id", goback_vars)])
mltd_models_df <- rbind(mltd_models_df, mltd_goback_df)
}
# mltd_models_df <- merge(mltd_models_df, glb_models_df[, c("id", "model_method")],
# all.x=TRUE)
png(paste0(glbOut$pfx, "models_bar.png"), width=480*3, height=480*2)
#print(gp <- myplot_bar(mltd_models_df, "id", "value", colorcol_name="model_method") +
print(gp <- myplot_bar(df=mltd_models_df, xcol_name="id", ycol_names="value") +
geom_errorbar(data=mrgdCI_models_df,
mapping=aes(x=mdl_id, ymax=value.Upper, ymin=value.Lower), width=0.5) +
facet_grid(label ~ data, scales="free") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5)))
## Warning: Removed 4 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
print(gp)
## Warning: Removed 4 rows containing missing values (geom_errorbar).
dsp_models_cols <- c("id",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
# if (glb_is_classification && glb_is_binomial)
# dsp_models_cols <- c(dsp_models_cols, "opt.prob.threshold.OOB")
print(dsp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)[, dsp_models_cols])
## id max.Accuracy.OOB max.AUCROCR.OOB
## 5 Interact.High.cor.Y##rcv#glmnet 0.5356068 0.6175050
## 6 Low.cor.X##rcv#glmnet 0.5285858 0.5990704
## 7 All.X##rcv#glmnet 0.5185557 0.6040644
## 3 Max.cor.Y.rcv.1X1###glmnet 0.5035105 0.6006841
## 8 All.X##rcv#glm 0.5015045 0.6019115
## 4 Max.cor.Y##rcv#rpart 0.5015045 0.5152716
## 1 MFO###myMFO_classfr 0.5015045 0.5000000
## 2 Random###myrandom_classfr 0.5015045 0.4974668
## max.AUCpROC.OOB max.Accuracy.fit opt.prob.threshold.fit
## 5 0.5736479 0.5998532 0.4
## 6 0.5737143 0.5955435 0.4
## 7 0.5886781 0.6001859 0.4
## 3 0.5736479 0.5144566 0.3
## 8 0.5826358 0.5875394 0.4
## 4 0.5152716 0.5569916 0.5
## 1 0.5000000 0.5014955 0.4
## 2 0.5084970 0.5014955 0.4
## opt.prob.threshold.OOB
## 5 0.3
## 6 0.4
## 7 0.2
## 3 0.2
## 8 0.0
## 4 0.2
## 1 0.4
## 2 0.4
# print(myplot_radar(radar_inp_df = dsp_models_df))
print("Metrics used for model selection:"); print(get_model_sel_frmla())
## [1] "Metrics used for model selection:"
## ~-max.Accuracy.OOB - max.AUCROCR.OOB - max.AUCpROC.OOB - max.Accuracy.fit -
## opt.prob.threshold.OOB
## <environment: 0x7fdd55e3fcd8>
print(sprintf("Best model id: %s", dsp_models_df[1, "id"]))
## [1] "Best model id: Interact.High.cor.Y##rcv#glmnet"
glb_get_predictions <- function(df, mdl_id, rsp_var, prob_threshold_def=NULL, verbose=FALSE) {
mdl <- glb_models_lst[[mdl_id]]
clmnNames <- mygetPredictIds(rsp_var, mdl_id)
predct_var_name <- clmnNames$value
predct_prob_var_name <- clmnNames$prob
predct_accurate_var_name <- clmnNames$is.acc
predct_error_var_name <- clmnNames$err
predct_erabs_var_name <- clmnNames$err.abs
if (glb_is_regression) {
df[, predct_var_name] <- predict(mdl, newdata=df, type="raw")
if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_var_name) +
facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="glm"))
df[, predct_error_var_name] <- df[, predct_var_name] - df[, glb_rsp_var]
if (verbose) print(myplot_scatter(df, predct_var_name, predct_error_var_name) +
#facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="auto"))
if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_error_var_name) +
#facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="glm"))
df[, predct_erabs_var_name] <- abs(df[, predct_error_var_name])
if (verbose) print(head(orderBy(reformulate(c("-", predct_erabs_var_name)), df)))
df[, predct_accurate_var_name] <- (df[, glb_rsp_var] == df[, predct_var_name])
}
if (glb_is_classification && glb_is_binomial) {
prob_threshold <- glb_models_df[glb_models_df$id == mdl_id,
"opt.prob.threshold.OOB"]
if (is.null(prob_threshold) || is.na(prob_threshold)) {
warning("Using default probability threshold: ", prob_threshold_def)
if (is.null(prob_threshold <- prob_threshold_def))
stop("Default probability threshold is NULL")
}
df[, predct_prob_var_name] <- predict(mdl, newdata = df, type = "prob")[, 2]
df[, predct_var_name] <-
factor(levels(df[, glb_rsp_var])[
(df[, predct_prob_var_name] >=
prob_threshold) * 1 + 1], levels(df[, glb_rsp_var]))
# if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_var_name) +
# facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="glm"))
df[, predct_error_var_name] <- df[, predct_var_name] != df[, glb_rsp_var]
# if (verbose) print(myplot_scatter(df, predct_var_name, predct_error_var_name) +
# #facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="auto"))
# if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_error_var_name) +
# #facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="glm"))
# if prediction is a TP (true +ve), measure distance from 1.0
tp <- which((df[, predct_var_name] == df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[2]))
df[tp, predct_erabs_var_name] <- abs(1 - df[tp, predct_prob_var_name])
#rowIx <- which.max(df[tp, predct_erabs_var_name]); df[tp, c(glbFeatsId, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a TN (true -ve), measure distance from 0.0
tn <- which((df[, predct_var_name] == df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[1]))
df[tn, predct_erabs_var_name] <- abs(0 - df[tn, predct_prob_var_name])
#rowIx <- which.max(df[tn, predct_erabs_var_name]); df[tn, c(glbFeatsId, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a FP (flse +ve), measure distance from 0.0
fp <- which((df[, predct_var_name] != df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[2]))
df[fp, predct_erabs_var_name] <- abs(0 - df[fp, predct_prob_var_name])
#rowIx <- which.max(df[fp, predct_erabs_var_name]); df[fp, c(glbFeatsId, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a FN (flse -ve), measure distance from 1.0
fn <- which((df[, predct_var_name] != df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[1]))
df[fn, predct_erabs_var_name] <- abs(1 - df[fn, predct_prob_var_name])
#rowIx <- which.max(df[fn, predct_erabs_var_name]); df[fn, c(glbFeatsId, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
if (verbose) print(head(orderBy(reformulate(c("-", predct_erabs_var_name)), df)))
df[, predct_accurate_var_name] <- (df[, glb_rsp_var] == df[, predct_var_name])
}
if (glb_is_classification && !glb_is_binomial) {
df[, predct_var_name] <- predict(mdl, newdata = df, type = "raw")
probCls <- predict(mdl, newdata = df, type = "prob")
df[, predct_prob_var_name] <- NA
for (cls in names(probCls)) {
mask <- (df[, predct_var_name] == cls)
df[mask, predct_prob_var_name] <- probCls[mask, cls]
}
if (verbose) print(myplot_histogram(df, predct_prob_var_name,
fill_col_name = predct_var_name))
if (verbose) print(myplot_histogram(df, predct_prob_var_name,
facet_frmla = paste0("~", glb_rsp_var)))
df[, predct_error_var_name] <- df[, predct_var_name] != df[, glb_rsp_var]
# if prediction is erroneous, measure predicted class prob from actual class prob
df[, predct_erabs_var_name] <- 0
for (cls in names(probCls)) {
mask <- (df[, glb_rsp_var] == cls) & (df[, predct_error_var_name])
df[mask, predct_erabs_var_name] <- probCls[mask, cls]
}
df[, predct_accurate_var_name] <- (df[, glb_rsp_var] == df[, predct_var_name])
}
return(df)
}
#stop(here"); glb2Sav(); glbObsAll <- savObsAll; glbObsTrn <- savObsTrn; glbObsFit <- savObsFit; glbObsOOB <- savObsOOB; sav_models_df <- glb_models_df; glb_models_df <- sav_models_df; glb_featsimp_df <- sav_featsimp_df
myget_category_stats <- function(obs_df, mdl_id, label) {
require(dplyr)
require(lazyeval)
predct_var_name <- mygetPredictIds(glb_rsp_var, mdl_id)$value
predct_error_var_name <- mygetPredictIds(glb_rsp_var, mdl_id)$err.abs
if (!predct_var_name %in% names(obs_df))
obs_df <- glb_get_predictions(obs_df, mdl_id, glb_rsp_var)
tmp_obs_df <- obs_df[, c(glbFeatsCategory, glb_rsp_var,
predct_var_name, predct_error_var_name)]
# tmp_obs_df <- obs_df %>%
# dplyr::select_(glbFeatsCategory, glb_rsp_var, predct_var_name, predct_error_var_name)
#dplyr::rename(startprice.log10.predict.RFE.X.glmnet.err=error_abs_OOB)
names(tmp_obs_df)[length(names(tmp_obs_df))] <- paste0("err.abs.", label)
ret_ctgry_df <- tmp_obs_df %>%
dplyr::group_by_(glbFeatsCategory) %>%
dplyr::summarise_(#interp(~sum(abs(var)), var=as.name(glb_rsp_var)),
interp(~sum(var), var=as.name(paste0("err.abs.", label))),
interp(~mean(var), var=as.name(paste0("err.abs.", label))),
interp(~n()))
names(ret_ctgry_df) <- c(glbFeatsCategory,
#paste0(glb_rsp_var, ".abs.", label, ".sum"),
paste0("err.abs.", label, ".sum"),
paste0("err.abs.", label, ".mean"),
paste0(".n.", label))
ret_ctgry_df <- dplyr::ungroup(ret_ctgry_df)
#colSums(ret_ctgry_df[, -grep(glbFeatsCategory, names(ret_ctgry_df))])
return(ret_ctgry_df)
}
#print(colSums((ctgry_df <- myget_category_stats(obs_df=glbObsFit, mdl_id="", label="fit"))[, -grep(glbFeatsCategory, names(ctgry_df))]))
if (!is.null(glb_mdl_ensemble)) {
fit.models_2_chunk_df <- myadd_chunk(fit.models_2_chunk_df,
paste0("fit.models_2_", mdl_id_pfx), major.inc = TRUE,
label.minor = "ensemble")
mdl_id_pfx <- "Ensemble"
if (#(glb_is_regression) |
((glb_is_classification) & (!glb_is_binomial)))
stop("Ensemble models not implemented yet for multinomial classification")
mygetEnsembleAutoMdlIds <- function() {
tmp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)
row.names(tmp_models_df) <- tmp_models_df$id
mdl_threshold_pos <-
min(which(grepl("MFO|Random|Baseline", tmp_models_df$id))) - 1
mdlIds <- tmp_models_df$id[1:mdl_threshold_pos]
return(mdlIds[!grepl("Ensemble", mdlIds)])
}
if (glb_mdl_ensemble == "auto") {
glb_mdl_ensemble <- mygetEnsembleAutoMdlIds()
mdl_id_pfx <- paste0(mdl_id_pfx, ".auto")
} else if (grepl("^%<d-%", glb_mdl_ensemble)) {
glb_mdl_ensemble <- eval(parse(text =
str_trim(unlist(strsplit(glb_mdl_ensemble, "%<d-%"))[2])))
}
for (mdl_id in glb_mdl_ensemble) {
if (!(mdl_id %in% names(glb_models_lst))) {
warning("Model ", mdl_id, " in glb_model_ensemble not found !")
next
}
glbObsFit <- glb_get_predictions(df = glbObsFit, mdl_id, glb_rsp_var)
glbObsOOB <- glb_get_predictions(df = glbObsOOB, mdl_id, glb_rsp_var)
}
#mdl_id_pfx <- "Ensemble.RFE"; mdlId <- paste0(mdl_id_pfx, ".glmnet")
#glb_mdl_ensemble <- gsub(mygetPredictIds$value, "", grep("RFE\\.X\\.(?!Interact)", row.names(glb_featsimp_df), perl = TRUE, value = TRUE), fixed = TRUE)
#varImp(glb_models_lst[[mdlId]])
#cor_df <- data.frame(cor=cor(glbObsFit[, glb_rsp_var], glbObsFit[, paste(mygetPredictIds$value, glb_mdl_ensemble)], use="pairwise.complete.obs"))
#glbObsFit <- glb_get_predictions(df=glbObsFit, "Ensemble.glmnet", glb_rsp_var);print(colSums((ctgry_df <- myget_category_stats(obs_df=glbObsFit, mdl_id="Ensemble.glmnet", label="fit"))[, -grep(glbFeatsCategory, names(ctgry_df))]))
### bid0_sp
# Better than MFO; models.n=28; min.RMSE.fit=0.0521233; err.abs.fit.sum=7.3631895
# old: Top x from auto; models.n= 5; min.RMSE.fit=0.06311047; err.abs.fit.sum=9.5937080
# RFE only ; models.n=16; min.RMSE.fit=0.05148588; err.abs.fit.sum=7.2875091
# RFE subset only ;models.n= 5; min.RMSE.fit=0.06040702; err.abs.fit.sum=9.059088
# RFE subset only ;models.n= 9; min.RMSE.fit=0.05933167; err.abs.fit.sum=8.7421288
# RFE subset only ;models.n=15; min.RMSE.fit=0.0584607; err.abs.fit.sum=8.5902066
# RFE subset only ;models.n=17; min.RMSE.fit=0.05496899; err.abs.fit.sum=8.0170431
# RFE subset only ;models.n=18; min.RMSE.fit=0.05441577; err.abs.fit.sum=7.837223
# RFE subset only ;models.n=16; min.RMSE.fit=0.05441577; err.abs.fit.sum=7.837223
### bid0_sp
### bid1_sp
# "auto"; err.abs.fit.sum=76.699774; min.RMSE.fit=0.2186429
# "RFE.X.*"; err.abs.fit.sum=; min.RMSE.fit=0.221114
### bid1_sp
indep_vars <- paste(mygetPredictIds(glb_rsp_var)$value, glb_mdl_ensemble, sep = "")
if (glb_is_classification)
indep_vars <- paste(indep_vars, ".prob", sep = "")
# Some models in glb_mdl_ensemble might not be fitted e.g. RFE.X.Interact
indep_vars <- intersect(indep_vars, names(glbObsFit))
# indep_vars <- grep(mygetPredictIds(glb_rsp_var)$value, names(glbObsFit), fixed=TRUE, value=TRUE)
# if (glb_is_regression)
# indep_vars <- indep_vars[!grepl("(err\\.abs|accurate)$", indep_vars)]
# if (glb_is_classification && glb_is_binomial)
# indep_vars <- grep("prob$", indep_vars, value=TRUE) else
# indep_vars <- indep_vars[!grepl("err$", indep_vars)]
#rfe_fit_ens_results <- myrun_rfe(glbObsFit, indep_vars)
for (method in c("glm", "glmnet")) {
for (trainControlMethod in
c("boot", "boot632", "cv", "repeatedcv"
#, "LOOCV" # tuneLength * nrow(fitDF)
, "LGOCV", "adaptive_cv"
#, "adaptive_boot" #error: adaptive$min should be less than 3
#, "adaptive_LGOCV" #error: adaptive$min should be less than 3
)) {
#sav_models_df <- glb_models_df; all.equal(sav_models_df, glb_models_df)
#glb_models_df <- sav_models_df; print(glb_models_df$id)
if ((method == "glm") && (trainControlMethod != "repeatedcv"))
# glm used only to identify outliers
next
ret_lst <- myfit_mdl(
mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = paste0(mdl_id_pfx, ".", trainControlMethod),
type = glb_model_type, tune.df = NULL,
trainControl.method = trainControlMethod,
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method)),
indep_vars = indep_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
}
dsp_models_df <- get_dsp_models_df()
}
if (is.null(glb_sel_mdl_id))
glb_sel_mdl_id <- dsp_models_df[1, "id"] else
print(sprintf("User specified selection: %s", glb_sel_mdl_id))
## [1] "User specified selection: All.X##rcv#glmnet"
myprint_mdl(glb_sel_mdl <- glb_models_lst[[glb_sel_mdl_id]])
## Length Class Mode
## a0 100 -none- numeric
## beta 5900 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 59 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) .pos
## -1.449891e+01 2.643456e-05
## CorBG.mad CorBG.mean
## 1.720048e+01 6.533482e+00
## CorGR.mad CorGR.mean
## -6.264303e-01 -1.131432e+00
## CorRB.mad CorRB.mean
## 1.954783e+00 8.972890e-01
## CosSmlBG.mad CosSmlBG.mean
## -8.264686e+01 -2.620369e+01
## CosSmlGR.mad CosSmlGR.mean
## 6.276494e+00 -3.670034e+00
## CosSmlRB.mad CosSmlRB.mean
## 7.696698e+00 -4.111970e+00
## lumB.mad.mad lumB.mad.mean
## -2.546541e+00 -1.559111e+01
## lumB.mean.mad lumB.mean.mean
## -5.670072e+00 1.981223e+00
## lumG.mad.mad lumG.mad.mean
## 6.923609e+00 1.453262e+01
## lumG.mad.mean.cut.fctr(0.21,0.22] lumG.mad.mean.cut.fctr(0.22,0.23]
## 9.798029e-02 6.791072e-02
## lumG.mad.mean.cut.fctr(0.23,0.37] lumG.mean.mad
## 3.430964e-01 8.539465e+00
## lumG.mean.mean lumR.mad.mad
## 2.013598e+00 -3.305769e+00
## lumR.mad.mean lumR.mean.mad
## 7.334658e+00 1.463017e+00
## lumR.mean.mean nImgs
## -1.167878e+01 -8.504570e-04
## nImgs.cut.fctr(32,60] nImgs.cut.fctr(60,120]
## 3.735109e-02 -1.917323e-01
## nImgs.cut.fctr(120,3e+03] nImgs.log1p
## -1.918515e-02 4.070804e-01
## nImgs.nexp nImgs.root2
## 2.208673e+01 -2.659685e-02
## resX.mad resX.mad.log1p
## -5.377572e-03 2.224490e-01
## resX.mad.nexp resX.mad.root2
## 4.085039e-01 1.156796e-02
## resX.mean resX.mean.log1p
## -2.197819e-05 3.836671e+00
## resX.mean.nexp resXY.mad
## -9.900000e+35 -2.139297e-05
## resXY.mad.log1p resXY.mad.nexp
## 1.112064e-02 5.126033e-01
## resXY.mad.root2 resXY.mean
## 6.778941e-03 -2.783972e-05
## resXY.mean.log1p resXY.mean.root2
## 1.388996e+00 -1.420357e-05
## resY.mad resY.mad.log1p
## -9.871835e-05 1.166314e-02
## resY.mad.nexp resY.mean
## 5.563463e-02 6.279220e-03
## resY.mean.log1p resY.mean.nexp
## 8.937997e-02 -9.900000e+35
## resY.mean.root2
## 1.428268e-01
## [1] "max lambda < lambdaOpt:"
## (Intercept) .pos
## -1.766701e+01 2.665889e-05
## CorBG.mad CorBG.mean
## 1.742576e+01 6.902726e+00
## CorGR.mad CorGR.mean
## -6.735344e-01 -1.240569e+00
## CorRB.mad CorRB.mean
## 1.949542e+00 8.784349e-01
## CosSmlBG.mad CosSmlBG.mean
## -8.329374e+01 -2.699752e+01
## CosSmlGR.mad CosSmlGR.mean
## 6.306698e+00 -3.525151e+00
## CosSmlRB.mad CosSmlRB.mean
## 7.817653e+00 -4.029393e+00
## lumB.mad.mad lumB.mad.mean
## -2.520761e+00 -1.581341e+01
## lumB.mean.mad lumB.mean.mean
## -5.742412e+00 2.133768e+00
## lumG.mad.mad lumG.mad.mean
## 6.943085e+00 1.469989e+01
## lumG.mad.mean.cut.fctr(0.21,0.22] lumG.mad.mean.cut.fctr(0.22,0.23]
## 9.642240e-02 6.698197e-02
## lumG.mad.mean.cut.fctr(0.23,0.37] lumG.mean.mad
## 3.408218e-01 8.580246e+00
## lumG.mean.mean lumR.mad.mad
## 1.911026e+00 -3.354100e+00
## lumR.mad.mean lumR.mean.mad
## 7.462387e+00 1.489701e+00
## lumR.mean.mean nImgs
## -1.175617e+01 -8.254572e-04
## nImgs.cut.fctr(32,60] nImgs.cut.fctr(60,120]
## 3.124532e-02 -2.013657e-01
## nImgs.cut.fctr(120,3e+03] nImgs.log1p
## -3.071014e-02 4.224753e-01
## nImgs.nexp nImgs.root2
## 2.269017e+01 -2.932907e-02
## resX.mad resX.mad.log1p
## -5.536788e-03 2.299891e-01
## resX.mad.nexp resX.mad.root2
## 4.280276e-01 1.175269e-02
## resX.mean resX.mean.log1p
## -3.772421e-04 3.963698e+00
## resX.mean.nexp resXY.mad
## -9.900000e+35 -2.193618e-05
## resXY.mad.log1p resXY.mad.nexp
## 1.273738e-02 5.303176e-01
## resXY.mad.root2 resXY.mean
## 6.912089e-03 -2.953664e-05
## resXY.mean.log1p resXY.mean.root2
## 1.683664e+00 -1.491646e-05
## resY.mad resY.mad.log1p
## -1.289518e-04 1.332490e-02
## resY.mad.nexp resY.mean
## 6.073816e-02 6.557304e-03
## resY.mean.nexp resY.mean.root2
## -9.900000e+35 1.408124e-01
## [1] TRUE
# From here to save(), this should all be in one function
# these are executed in the same seq twice more:
# fit.data.training & predict.data.new chunks
print(sprintf("%s fit prediction diagnostics:", glb_sel_mdl_id))
## [1] "All.X##rcv#glmnet fit prediction diagnostics:"
glbObsFit <- glb_get_predictions(df = glbObsFit, mdl_id = glb_sel_mdl_id,
rsp_var = glb_rsp_var)
print(sprintf("%s OOB prediction diagnostics:", glb_sel_mdl_id))
## [1] "All.X##rcv#glmnet OOB prediction diagnostics:"
glbObsOOB <- glb_get_predictions(df = glbObsOOB, mdl_id = glb_sel_mdl_id,
rsp_var = glb_rsp_var)
print(glb_featsimp_df <- myget_feats_importance(mdl = glb_sel_mdl, featsimp_df = NULL))
## All.X..rcv.glmnet.imp imp
## resX.mean.nexp 1.000000e+02 1.000000e+02
## resY.mean.nexp 1.000000e+02 1.000000e+02
## CosSmlBG.mad 8.400892e-33 8.400892e-33
## CosSmlBG.mean 2.711538e-33 2.711538e-33
## nImgs.nexp 2.280166e-33 2.280166e-33
## CorBG.mad 1.755783e-33 1.755783e-33
## lumB.mad.mean 1.592978e-33 1.592978e-33
## lumG.mad.mean 1.481575e-33 1.481575e-33
## lumR.mean.mean 1.185983e-33 1.185983e-33
## lumG.mean.mad 8.658961e-34 8.658961e-34
## CosSmlRB.mad 7.873026e-34 7.873026e-34
## lumR.mad.mean 7.512850e-34 7.512850e-34
## lumG.mad.mad 7.009418e-34 7.009418e-34
## CorBG.mean 6.900427e-34 6.900427e-34
## CosSmlGR.mad 6.364510e-34 6.364510e-34
## lumB.mean.mad 5.786306e-34 5.786306e-34
## CosSmlRB.mean 4.086201e-34 4.086201e-34
## resX.mean.log1p 3.978958e-34 3.978958e-34
## CosSmlGR.mean 3.589019e-34 3.589019e-34
## lumR.mad.mad 3.378553e-34 3.378553e-34
## lumB.mad.mad 2.551252e-34 2.551252e-34
## lumB.mean.mean 2.125566e-34 2.125566e-34
## CorRB.mad 1.970257e-34 1.970257e-34
## lumG.mean.mean 1.950337e-34 1.950337e-34
## resXY.mean.log1p 1.643194e-34 1.643194e-34
## lumR.mean.mad 1.499543e-34 1.499543e-34
## CorGR.mean 1.231812e-34 1.231812e-34
## CorRB.mean 8.909856e-35 8.909856e-35
## CorGR.mad 6.711498e-35 6.711498e-35
## resXY.mad.nexp 5.322190e-35 5.322190e-35
## resX.mad.nexp 4.285429e-35 4.285429e-35
## nImgs.log1p 4.237398e-35 4.237398e-35
## lumG.mad.mean.cut.fctr(0.23,0.37] 3.447082e-35 3.447082e-35
## resX.mad.log1p 2.308415e-35 2.308415e-35
## nImgs.cut.fctr(60,120] 2.015206e-35 2.015206e-35
## resY.mean.root2 1.426277e-35 1.426277e-35
## lumG.mad.mean.cut.fctr(0.21,0.22] 9.770024e-36 9.770024e-36
## lumG.mad.mean.cut.fctr(0.22,0.23] 6.783971e-36 6.783971e-36
## resY.mad.nexp 6.035620e-36 6.035620e-36
## nImgs.cut.fctr(32,60] 3.275190e-36 3.275190e-36
## nImgs.root2 2.909239e-36 2.909239e-36
## nImgs.cut.fctr(120,3e+03] 2.877231e-36 2.877231e-36
## resY.mean.log1p 1.743416e-36 1.743416e-36
## resY.mad.log1p 1.313536e-36 1.313536e-36
## resXY.mad.log1p 1.255068e-36 1.255068e-36
## resX.mad.root2 1.183537e-36 1.183537e-36
## resXY.mad.root2 6.955937e-37 6.955937e-37
## resY.mean 6.569297e-37 6.569297e-37
## resX.mad 5.561659e-37 5.561659e-37
## nImgs 8.386716e-38 8.386716e-38
## resX.mean 3.117560e-38 3.117560e-38
## resY.mad 1.243571e-38 1.243571e-38
## resXY.mean 2.950400e-39 2.950400e-39
## .pos 2.688441e-39 2.688441e-39
## resXY.mad 2.205180e-39 2.205180e-39
## resXY.mean.root2 1.492808e-39 1.492808e-39
## .rnorm 0.000000e+00 0.000000e+00
## resX.mean.root2 0.000000e+00 0.000000e+00
## resY.mad.root2 0.000000e+00 0.000000e+00
#mdl_id <-"RFE.X.glmnet"; glb_featsimp_df <- myget_feats_importance(glb_models_lst[[mdl_id]], glb_featsimp_df); glb_featsimp_df[, paste0(mdl_id, ".imp")] <- glb_featsimp_df$imp; print(glb_featsimp_df)
#print(head(sbst_featsimp_df <- subset(glb_featsimp_df, is.na(RFE.X.glmnet.imp) | (abs(RFE.X.YeoJohnson.glmnet.imp - RFE.X.glmnet.imp) > 0.0001), select=-imp)))
#print(orderBy(~ -cor.y.abs, subset(glb_feats_df, id %in% c(row.names(sbst_featsimp_df), "startprice.dcm1.is9", "D.weight.post.stop.sum"))))
# Used again in fit.data.training & predict.data.new chunks
glb_analytics_diag_plots <- function(obs_df, mdl_id, prob_threshold=NULL) {
if (!is.null(featsimp_df <- glb_featsimp_df)) {
featsimp_df$feat <- gsub("`(.*?)`", "\\1", row.names(featsimp_df))
featsimp_df$feat.interact <- gsub("(.*?):(.*)", "\\2", featsimp_df$feat)
featsimp_df$feat <- gsub("(.*?):(.*)", "\\1", featsimp_df$feat)
featsimp_df$feat.interact <-
ifelse(featsimp_df$feat.interact == featsimp_df$feat,
NA, featsimp_df$feat.interact)
featsimp_df$feat <-
gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat)
featsimp_df$feat.interact <-
gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat.interact)
featsimp_df <- orderBy(~ -imp.max,
summaryBy(imp ~ feat + feat.interact, data=featsimp_df,
FUN=max))
#rex_str=":(.*)"; txt_vctr=tail(featsimp_df$feat); ret_lst <- regexec(rex_str, txt_vctr); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])
featsimp_df <- subset(featsimp_df, !is.na(imp.max))
if (nrow(featsimp_df) > 5) {
warning("Limiting important feature scatter plots to 5 out of ",
nrow(featsimp_df))
featsimp_df <- head(featsimp_df, 5)
}
# if (!all(is.na(featsimp_df$feat.interact)))
# stop("not implemented yet")
rsp_var_out <- mygetPredictIds(glb_rsp_var, mdl_id)$value
for (var in featsimp_df$feat) {
plot_df <- melt(obs_df, id.vars = var,
measure.vars = c(glb_rsp_var, rsp_var_out))
print(myplot_scatter(plot_df, var, "value", colorcol_name = "variable",
facet_colcol_name = "variable", jitter = TRUE) +
guides(color = FALSE))
}
}
if (glb_is_regression) {
if (is.null(featsimp_df) || (nrow(featsimp_df) == 0))
warning("No important features in glb_fin_mdl") else
print(myplot_prediction_regression(df=obs_df,
feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2],
".rownames"),
feat_y=featsimp_df$feat[1],
rsp_var=glb_rsp_var, rsp_var_out=rsp_var_out,
id_vars=glbFeatsId)
# + facet_wrap(reformulate(featsimp_df$feat[2])) # if [1 or 2] is a factor
# + geom_point(aes_string(color="<col_name>.fctr")) # to color the plot
)
}
if (glb_is_classification) {
if (is.null(featsimp_df) || (nrow(featsimp_df) == 0))
warning("No features in selected model are statistically important")
else print(myplot_prediction_classification(df = obs_df,
feat_x = ifelse(nrow(featsimp_df) > 1,
featsimp_df$feat[2], ".rownames"),
feat_y = featsimp_df$feat[1],
rsp_var = glb_rsp_var,
rsp_var_out = rsp_var_out,
id_vars = glbFeatsId,
prob_threshold = prob_threshold))
}
}
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id,
prob_threshold = glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id)
## Warning in glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id =
## glb_sel_mdl_id, : Limiting important feature scatter plots to 5 out of 55
## [1] "Min/Max Boundaries: "
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 1 1601 Y 0.4237897
## 2 1375 Y 0.4761159
## 3 455 N 0.8443050
## 4 2846 N 0.8955671
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 1 Y FALSE
## 2 Y FALSE
## 3 Y TRUE
## 4 Y TRUE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 1 0.5762103
## 2 0.5238841
## 3 0.8443050
## 4 0.8955671
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## outdoor.fctr.All.X..rcv.glmnet.accurate
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## outdoor.fctr.All.X..rcv.glmnet.error .label
## 1 0.0000000 1601
## 2 0.0000000 1375
## 3 0.6443050 455
## 4 0.6955671 2846
## [1] "Inaccurate: "
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 1 1490 Y 0.06796769
## 2 2336 Y 0.14891316
## 3 3848 Y 0.14964243
## 4 3748 Y 0.15427858
## 5 723 Y 0.16502650
## 6 993 Y 0.18151305
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 1 N TRUE
## 2 N TRUE
## 3 N TRUE
## 4 N TRUE
## 5 N TRUE
## 6 N TRUE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 1 0.9320323
## 2 0.8510868
## 3 0.8503576
## 4 0.8457214
## 5 0.8349735
## 6 0.8184870
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## outdoor.fctr.All.X..rcv.glmnet.accurate
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## outdoor.fctr.All.X..rcv.glmnet.error
## 1 -0.13203231
## 2 -0.05108684
## 3 -0.05035757
## 4 -0.04572142
## 5 -0.03497350
## 6 -0.01848695
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 65 3325 N 0.3161636
## 109 3826 N 0.3663002
## 225 1750 N 0.4800385
## 243 2664 N 0.4893079
## 287 63 N 0.5254733
## 333 2023 N 0.5676532
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 65 Y TRUE
## 109 Y TRUE
## 225 Y TRUE
## 243 Y TRUE
## 287 Y TRUE
## 333 Y TRUE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 65 0.3161636
## 109 0.3663002
## 225 0.4800385
## 243 0.4893079
## 287 0.5254733
## 333 0.5676532
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 65 FALSE
## 109 FALSE
## 225 FALSE
## 243 FALSE
## 287 FALSE
## 333 FALSE
## outdoor.fctr.All.X..rcv.glmnet.accurate
## 65 FALSE
## 109 FALSE
## 225 FALSE
## 243 FALSE
## 287 FALSE
## 333 FALSE
## outdoor.fctr.All.X..rcv.glmnet.error
## 65 0.1161636
## 109 0.1663002
## 225 0.2800385
## 243 0.2893079
## 287 0.3254733
## 333 0.3676532
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 475 1839 N 0.8147600
## 476 744 N 0.8197807
## 477 839 N 0.8352836
## 478 455 N 0.8443050
## 479 2296 N 0.8473961
## 480 2846 N 0.8955671
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 475 Y TRUE
## 476 Y TRUE
## 477 Y TRUE
## 478 Y TRUE
## 479 Y TRUE
## 480 Y TRUE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 475 0.8147600
## 476 0.8197807
## 477 0.8352836
## 478 0.8443050
## 479 0.8473961
## 480 0.8955671
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 475 FALSE
## 476 FALSE
## 477 FALSE
## 478 FALSE
## 479 FALSE
## 480 FALSE
## outdoor.fctr.All.X..rcv.glmnet.accurate
## 475 FALSE
## 476 FALSE
## 477 FALSE
## 478 FALSE
## 479 FALSE
## 480 FALSE
## outdoor.fctr.All.X..rcv.glmnet.error
## 475 0.6147600
## 476 0.6197807
## 477 0.6352836
## 478 0.6443050
## 479 0.6473961
## 480 0.6955671
if (!is.null(glbFeatsCategory)) {
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsFit, mdl_id = glb_sel_mdl_id,
label = "fit"),
by = glbFeatsCategory, all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id,
label="OOB"),
#by=glbFeatsCategory, all=TRUE) glb_ctgry-df already contains .n.OOB ?
all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
if (any(grepl("OOB", glbMdlMetricsEval)))
print(orderBy(~-err.abs.OOB.mean, glbLvlCategory)) else
print(orderBy(~-err.abs.fit.mean, glbLvlCategory))
print(colSums(glbLvlCategory[, -grep(glbFeatsCategory, names(glbLvlCategory))]))
}
## lumG.mad.mean.cut.fctr .n.OOB .n.Fit .n.Tst .freqRatio.Fit
## (0.22,0.23] (0.22,0.23] 114 116 2192 0.1156530
## (0.21,0.22] (0.21,0.22] 107 108 2591 0.1076770
## (0.23,0.37] (0.23,0.37] 411 412 2416 0.4107677
## (0.07,0.21] (0.07,0.21] 365 367 2801 0.3659023
## .freqRatio.OOB .freqRatio.Tst err.abs.fit.sum err.abs.fit.mean
## (0.22,0.23] 0.1143430 0.2192 52.70722 0.4543726
## (0.21,0.22] 0.1073220 0.2591 46.81490 0.4334713
## (0.23,0.37] 0.4122367 0.2416 184.14207 0.4469468
## (0.07,0.21] 0.3660983 0.2801 163.40703 0.4452508
## .n.fit err.abs.OOB.sum err.abs.OOB.mean
## (0.22,0.23] 116 55.29970 0.4850851
## (0.21,0.22] 108 51.65088 0.4827185
## (0.23,0.37] 412 192.79373 0.4690845
## (0.07,0.21] 367 170.30356 0.4665851
## .n.OOB .n.Fit .n.Tst .freqRatio.Fit
## 997.000000 1003.000000 10000.000000 1.000000
## .freqRatio.OOB .freqRatio.Tst err.abs.fit.sum err.abs.fit.mean
## 1.000000 1.000000 447.071216 1.780041
## .n.fit err.abs.OOB.sum err.abs.OOB.mean
## 1003.000000 470.047879 1.903473
write.csv(glbObsOOB[, c(glbFeatsId,
grep(glb_rsp_var, names(glbObsOOB), fixed=TRUE, value=TRUE))],
paste0(gsub(".", "_", paste0(glbOut$pfx, glb_sel_mdl_id), fixed=TRUE),
"_OOBobs.csv"), row.names=FALSE)
fit.models_2_chunk_df <-
myadd_chunk(NULL, "fit.models_2_bgn", label.minor = "teardown")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_2_bgn 1 0 teardown 311.741 NA NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 18 fit.models 8 2 2 300.743 311.751 11.008
## 19 fit.models 8 3 3 311.751 NA NA
# if (sum(is.na(glbObsAll$D.P.http)) > 0)
# stop("fit.models_3: Why is this happening ?")
#stop(here"); glb2Sav()
sync_glb_obs_df <- function() {
# Merge or cbind ?
for (col in setdiff(names(glbObsFit), names(glbObsTrn)))
glbObsTrn[glbObsTrn$.lcn == "Fit", col] <<- glbObsFit[, col]
for (col in setdiff(names(glbObsFit), names(glbObsAll)))
glbObsAll[glbObsAll$.lcn == "Fit", col] <<- glbObsFit[, col]
if (all(is.na(glbObsNew[, glb_rsp_var])))
for (col in setdiff(names(glbObsOOB), names(glbObsTrn)))
glbObsTrn[glbObsTrn$.lcn == "OOB", col] <<- glbObsOOB[, col]
for (col in setdiff(names(glbObsOOB), names(glbObsAll)))
glbObsAll[glbObsAll$.lcn == "OOB", col] <<- glbObsOOB[, col]
}
sync_glb_obs_df()
print(setdiff(names(glbObsNew), names(glbObsAll)))
## character(0)
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"model.selected")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc = TRUE)
## label step_major step_minor label_minor bgn end
## 19 fit.models 8 3 3 311.751 316.077
## 20 fit.data.training 9 0 0 316.077 NA
## elapsed
## 19 4.326
## 20 NA
9.0: fit data training#load(paste0(glb_inp_pfx, "dsk.RData"))
if (!is.null(glb_fin_mdl_id) && (glb_fin_mdl_id %in% names(glb_models_lst))) {
warning("Final model same as user selected model")
glb_fin_mdl <- glb_models_lst[[glb_fin_mdl_id]]
} else
# if (nrow(glbObsFit) + length(glbObsFitOutliers) == nrow(glbObsTrn))
if (!all(is.na(glbObsNew[, glb_rsp_var])))
{
warning("Final model same as glb_sel_mdl_id")
glb_fin_mdl_id <- paste0("Final.", glb_sel_mdl_id)
glb_fin_mdl <- glb_sel_mdl
glb_models_lst[[glb_fin_mdl_id]] <- glb_fin_mdl
} else {
if (grepl("RFE\\.X", names(glbMdlFamilies))) {
indep_vars <- myadjust_interaction_feats(subset(glb_feats_df,
!nzv & (exclude.as.feat != 1))[, "id"])
rfe_trn_results <-
myrun_rfe(glbObsTrn, indep_vars, glbRFESizes[["Final"]])
if (!isTRUE(all.equal(sort(predictors(rfe_trn_results)),
sort(predictors(rfe_fit_results))))) {
print("Diffs predictors(rfe_trn_results) vs. predictors(rfe_fit_results):")
print(setdiff(predictors(rfe_trn_results), predictors(rfe_fit_results)))
print("Diffs predictors(rfe_fit_results) vs. predictors(rfe_trn_results):")
print(setdiff(predictors(rfe_fit_results), predictors(rfe_trn_results)))
}
}
# }
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(myget_feats_importance(glb_sel_mdl), imp > 5)
# Fit selected models on glbObsTrn
for (mdl_id in gsub(".prob", "",
gsub(mygetPredictIds(glb_rsp_var)$value, "", row.names(mdlimp_df), fixed = TRUE),
fixed = TRUE)) {
mdl_id_components <- unlist(strsplit(mdl_id, "[.]"))
mdlIdPfx <- paste0(c(head(mdl_id_components, -1), "Train"),
collapse = ".")
if (grepl("RFE\\.X\\.", mdlIdPfx))
mdlIndepVars <- myadjust_interaction_feats(myextract_actual_feats(
predictors(rfe_trn_results))) else
mdlIndepVars <- trim(unlist(
strsplit(glb_models_df[glb_models_df$id == mdl_id, "feats"], "[,]")))
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdlIdPfx,
type = glb_model_type, tune.df = glbMdlTuneParams,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = tail(mdl_id_components, 1))),
indep_vars = mdlIndepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsTrn, OOB_df = NULL)
glbObsTrn <- glb_get_predictions(df = glbObsTrn,
mdl_id = tail(glb_models_df$id, 1),
rsp_var = glb_rsp_var,
prob_threshold_def =
subset(glb_models_df, id == mdl_id)$opt.prob.threshold.OOB)
glbObsNew <- glb_get_predictions(df = glbObsNew,
mdl_id = tail(glb_models_df$id, 1),
rsp_var = glb_rsp_var,
prob_threshold_def =
subset(glb_models_df, id == mdl_id)$opt.prob.threshold.OOB)
}
}
# "Final" model
if ((model_method <- glb_sel_mdl$method) == "custom")
# get actual method from the mdl_id
model_method <- tail(unlist(strsplit(glb_sel_mdl_id, "[.]")), 1)
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(myget_feats_importance(glb_sel_mdl), imp > 5)
if (glb_is_classification && glb_is_binomial)
indep_vars_vctr <- gsub("(.*)\\.(.*)\\.prob", "\\1\\.Train\\.\\2\\.prob",
row.names(mdlimp_df)) else
indep_vars_vctr <- gsub("(.*)\\.(.*)", "\\1\\.Train\\.\\2",
row.names(mdlimp_df))
} else
if (grepl("RFE.X", glb_sel_mdl_id, fixed = TRUE)) {
indep_vars_vctr <- myextract_actual_feats(predictors(rfe_trn_results))
} else indep_vars_vctr <-
trim(unlist(strsplit(glb_models_df[glb_models_df$id ==
glb_sel_mdl_id
, "feats"], "[,]")))
if (!is.null(glb_preproc_methods) &&
((match_pos <- regexpr(gsub(".", "\\.",
paste(glb_preproc_methods, collapse = "|"),
fixed = TRUE), glb_sel_mdl_id)) != -1))
ths_preProcess <- str_sub(glb_sel_mdl_id, match_pos,
match_pos + attr(match_pos, "match.length") - 1) else
ths_preProcess <- NULL
mdl_id_pfx <- ifelse(grepl("Ensemble", glb_sel_mdl_id),
"Final.Ensemble", "Final")
trnobs_df <- glbObsTrn
if (!is.null(glbObsTrnOutliers[[mdl_id_pfx]])) {
trnobs_df <- glbObsTrn[!(glbObsTrn[, glbFeatsId] %in% glbObsTrnOutliers[[mdl_id_pfx]]), ]
print(sprintf("Outliers removed: %d", nrow(glbObsTrn) - nrow(trnobs_df)))
print(setdiff(glbObsTrn[, glbFeatsId], trnobs_df[, glbFeatsId]))
}
# Force fitting of Final.glm to identify outliers
method_vctr <- unique(c(myparseMdlId(glb_sel_mdl_id)$alg, glbMdlFamilies[["Final"]]))
for (method in method_vctr) {
#source("caret_nominalTrainWorkflow.R")
# glmnet requires at least 2 indep vars
if ((length(indep_vars_vctr) == 1) && (method %in% "glmnet"))
next
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdl_id_pfx,
type = glb_model_type, trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = glbMdlAllowParallel,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method,
train.preProcess = ths_preProcess)),
indep_vars = indep_vars_vctr, rsp_var = glb_rsp_var,
fit_df = trnobs_df, OOB_df = NULL)
if ((length(method_vctr) == 1) || (method != "glm")) {
glb_fin_mdl <- glb_models_lst[[length(glb_models_lst)]]
glb_fin_mdl_id <- glb_models_df[length(glb_models_lst), "id"]
}
}
}
## [1] "myfit_mdl: enter: 0.000000 secs"
## [1] "fitting model: Final##rcv#glmnet"
## [1] " indep_vars: lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean"
## [1] "myfit_mdl: setup complete: 0.808000 secs"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.775, lambda = 0.0249 on full training set
## [1] "myfit_mdl: train complete: 10.995000 secs"
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = mdl_id_pfx, : model's bestTune found at an extreme of
## tuneGrid for parameter: lambda
## Length Class Mode
## a0 84 -none- numeric
## beta 4956 dgCMatrix S4
## df 84 -none- numeric
## dim 2 -none- numeric
## lambda 84 -none- numeric
## dev.ratio 84 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 59 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) CorRB.mad
## 1.08415521 1.71224360
## lumB.mean.mad lumG.mad.mad
## 0.08746083 0.63702448
## lumG.mad.mean lumG.mad.mean.cut.fctr(0.23,0.37]
## 2.28482595 0.18663950
## lumG.mean.mad lumG.mean.mean
## 0.97443904 1.51234926
## lumR.mean.mean nImgs.log1p
## -5.08538142 0.01955754
## [1] "max lambda < lambdaOpt:"
## (Intercept) CorRB.mad
## 1.06098887 2.29852371
## lumB.mean.mad lumG.mad.mad
## 0.24721229 0.67427358
## lumG.mad.mean lumG.mad.mean.cut.fctr(0.23,0.37]
## 2.47274203 0.19477939
## lumG.mean.mad lumG.mean.mean
## 1.12056717 1.81809413
## lumR.mean.mean nImgs.log1p
## -5.54321213 0.02786617
## [1] "myfit_mdl: train diagnostics complete: 11.614000 secs"
## Prediction
## Reference N Y
## N 92 905
## Y 36 967
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.295000e-01 5.653189e-02 5.073454e-01 5.515680e-01 5.015000e-01
## AccuracyPValue McnemarPValue
## 6.519607e-03 3.871560e-176
## [1] "myfit_mdl: predict complete: 13.819000 secs"
## id
## 1 Final##rcv#glmnet
## feats
## 1 lumG.mad.mean.cut.fctr,lumG.mad.mean,lumG.mad.mad,nImgs.cut.fctr,lumG.mean.mean,lumG.mean.mad,lumB.mean.mad,nImgs.log1p,lumB.mad.mad,CorRB.mad,lumR.mad.mean,CorGR.mad,CosSmlBG.mean,.pos,resX.mad.log1p,resX.mad.root2,CosSmlGR.mean,CosSmlRB.mean,resX.mad,lumB.mad.mean,CorBG.mean,lumR.mean.mad,resXY.mad.nexp,nImgs.root2,lumR.mad.mad,resY.mean.log1p,resY.mean.root2,resY.mean,resY.mad.nexp,resY.mad,CorGR.mean,CorBG.mad,resY.mad.root2,resY.mad.log1p,nImgs.nexp,CosSmlGR.mad,CosSmlRB.mad,resXY.mean.log1p,resXY.mean.root2,.rnorm,resXY.mean,CorRB.mean,resXY.mad.root2,resXY.mad,resX.mad.nexp,resXY.mad.log1p,nImgs,resX.mean.log1p,resX.mean.root2,resX.mean,resX.mean.nexp,resY.mean.nexp,lumB.mean.mean,CosSmlBG.mad,lumR.mean.mean
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 10.16 0.754
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.6014019 0.5687061 0.6340977 0.6407848
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6726957 0.5921722
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.5073454 0.551568 0.1841452
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02570656 0.05140304
## [1] "myfit_mdl: exit: 13.836000 secs"
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=FALSE)
## label step_major step_minor label_minor bgn end
## 20 fit.data.training 9 0 0 316.077 330.427
## 21 fit.data.training 9 1 1 330.427 NA
## elapsed
## 20 14.35
## 21 NA
#stop(here"); glb2Sav()
if (glb_is_classification && glb_is_binomial)
prob_threshold <- glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"] else
prob_threshold <- NULL
if (grepl("Ensemble", glb_fin_mdl_id)) {
# Get predictions for each model in ensemble; Outliers that have been moved to OOB might not have been predicted yet
mdlEnsembleComps <- unlist(str_split(subset(glb_models_df,
id == glb_fin_mdl_id)$feats, ","))
if (glb_is_classification && glb_is_binomial)
mdlEnsembleComps <- gsub("\\.prob$", "", mdlEnsembleComps)
mdlEnsembleComps <- gsub(paste0("^",
gsub(".", "\\.", mygetPredictIds(glb_rsp_var)$value, fixed = TRUE)),
"", mdlEnsembleComps)
for (mdl_id in mdlEnsembleComps) {
glbObsTrn <- glb_get_predictions(df = glbObsTrn, mdl_id = mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
glbObsNew <- glb_get_predictions(df = glbObsNew, mdl_id = mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
}
}
glbObsTrn <- glb_get_predictions(df = glbObsTrn, mdl_id = glb_fin_mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
## Warning in glb_get_predictions(df = glbObsTrn, mdl_id = glb_fin_mdl_id, :
## Using default probability threshold: 0.2
glb_featsimp_df <- myget_feats_importance(mdl=glb_fin_mdl,
featsimp_df=glb_featsimp_df)
#glb_featsimp_df[, paste0(glb_fin_mdl_id, ".imp")] <- glb_featsimp_df$imp
print(glb_featsimp_df)
## All.X..rcv.glmnet.imp
## lumR.mean.mean 1.185983e-33
## lumG.mad.mean 1.481575e-33
## CorRB.mad 1.970257e-34
## lumG.mean.mean 1.950337e-34
## lumG.mean.mad 8.658961e-34
## lumG.mad.mad 7.009418e-34
## lumB.mean.mad 5.786306e-34
## lumG.mad.mean.cut.fctr(0.23,0.37] 3.447082e-35
## nImgs.log1p 4.237398e-35
## .pos 2.688441e-39
## .rnorm 0.000000e+00
## CorBG.mad 1.755783e-33
## CorBG.mean 6.900427e-34
## CorGR.mad 6.711498e-35
## CorGR.mean 1.231812e-34
## CorRB.mean 8.909856e-35
## CosSmlBG.mad 8.400892e-33
## CosSmlBG.mean 2.711538e-33
## CosSmlGR.mad 6.364510e-34
## CosSmlGR.mean 3.589019e-34
## CosSmlRB.mad 7.873026e-34
## CosSmlRB.mean 4.086201e-34
## lumB.mad.mad 2.551252e-34
## lumB.mad.mean 1.592978e-33
## lumB.mean.mean 2.125566e-34
## lumG.mad.mean.cut.fctr(0.21,0.22] 9.770024e-36
## lumG.mad.mean.cut.fctr(0.22,0.23] 6.783971e-36
## lumR.mad.mad 3.378553e-34
## lumR.mad.mean 7.512850e-34
## lumR.mean.mad 1.499543e-34
## nImgs 8.386716e-38
## nImgs.cut.fctr(120,3e+03] 2.877231e-36
## nImgs.cut.fctr(32,60] 3.275190e-36
## nImgs.cut.fctr(60,120] 2.015206e-35
## nImgs.nexp 2.280166e-33
## nImgs.root2 2.909239e-36
## resX.mad 5.561659e-37
## resX.mad.log1p 2.308415e-35
## resX.mad.nexp 4.285429e-35
## resX.mad.root2 1.183537e-36
## resX.mean 3.117560e-38
## resX.mean.log1p 3.978958e-34
## resX.mean.nexp 1.000000e+02
## resX.mean.root2 0.000000e+00
## resXY.mad 2.205180e-39
## resXY.mad.log1p 1.255068e-36
## resXY.mad.nexp 5.322190e-35
## resXY.mad.root2 6.955937e-37
## resXY.mean 2.950400e-39
## resXY.mean.log1p 1.643194e-34
## resXY.mean.root2 1.492808e-39
## resY.mad 1.243571e-38
## resY.mad.log1p 1.313536e-36
## resY.mad.nexp 6.035620e-36
## resY.mad.root2 0.000000e+00
## resY.mean 6.569297e-37
## resY.mean.log1p 1.743416e-36
## resY.mean.nexp 1.000000e+02
## resY.mean.root2 1.426277e-35
## Final..rcv.glmnet.imp imp
## lumR.mean.mean 100.0000000 100.0000000
## lumG.mad.mean 44.6692489 44.6692489
## CorRB.mad 39.9886662 39.9886662
## lumG.mean.mean 32.2189583 32.2189583
## lumG.mean.mad 20.0155274 20.0155274
## lumG.mad.mad 12.2326513 12.2326513
## lumB.mean.mad 3.9406574 3.9406574
## lumG.mad.mean.cut.fctr(0.23,0.37] 3.5434439 3.5434439
## nImgs.log1p 0.4803292 0.4803292
## .pos 0.0000000 0.0000000
## .rnorm 0.0000000 0.0000000
## CorBG.mad 0.0000000 0.0000000
## CorBG.mean 0.0000000 0.0000000
## CorGR.mad 0.0000000 0.0000000
## CorGR.mean 0.0000000 0.0000000
## CorRB.mean 0.0000000 0.0000000
## CosSmlBG.mad 0.0000000 0.0000000
## CosSmlBG.mean 0.0000000 0.0000000
## CosSmlGR.mad 0.0000000 0.0000000
## CosSmlGR.mean 0.0000000 0.0000000
## CosSmlRB.mad 0.0000000 0.0000000
## CosSmlRB.mean 0.0000000 0.0000000
## lumB.mad.mad 0.0000000 0.0000000
## lumB.mad.mean 0.0000000 0.0000000
## lumB.mean.mean 0.0000000 0.0000000
## lumG.mad.mean.cut.fctr(0.21,0.22] 0.0000000 0.0000000
## lumG.mad.mean.cut.fctr(0.22,0.23] 0.0000000 0.0000000
## lumR.mad.mad 0.0000000 0.0000000
## lumR.mad.mean 0.0000000 0.0000000
## lumR.mean.mad 0.0000000 0.0000000
## nImgs 0.0000000 0.0000000
## nImgs.cut.fctr(120,3e+03] 0.0000000 0.0000000
## nImgs.cut.fctr(32,60] 0.0000000 0.0000000
## nImgs.cut.fctr(60,120] 0.0000000 0.0000000
## nImgs.nexp 0.0000000 0.0000000
## nImgs.root2 0.0000000 0.0000000
## resX.mad 0.0000000 0.0000000
## resX.mad.log1p 0.0000000 0.0000000
## resX.mad.nexp 0.0000000 0.0000000
## resX.mad.root2 0.0000000 0.0000000
## resX.mean 0.0000000 0.0000000
## resX.mean.log1p 0.0000000 0.0000000
## resX.mean.nexp 0.0000000 0.0000000
## resX.mean.root2 0.0000000 0.0000000
## resXY.mad 0.0000000 0.0000000
## resXY.mad.log1p 0.0000000 0.0000000
## resXY.mad.nexp 0.0000000 0.0000000
## resXY.mad.root2 0.0000000 0.0000000
## resXY.mean 0.0000000 0.0000000
## resXY.mean.log1p 0.0000000 0.0000000
## resXY.mean.root2 0.0000000 0.0000000
## resY.mad 0.0000000 0.0000000
## resY.mad.log1p 0.0000000 0.0000000
## resY.mad.nexp 0.0000000 0.0000000
## resY.mad.root2 0.0000000 0.0000000
## resY.mean 0.0000000 0.0000000
## resY.mean.log1p 0.0000000 0.0000000
## resY.mean.nexp 0.0000000 0.0000000
## resY.mean.root2 0.0000000 0.0000000
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df=glbObsTrn, mdl_id=glb_fin_mdl_id,
prob_threshold=glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df=glbObsTrn, mdl_id=glb_fin_mdl_id)
## Warning in glb_analytics_diag_plots(obs_df = glbObsTrn, mdl_id =
## glb_fin_mdl_id, : Limiting important feature scatter plots to 5 out of 55
## [1] "Min/Max Boundaries: "
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 1 723 Y NA
## 2 917 Y 0.9635408
## 3 629 N NA
## 4 1291 N 0.1732483
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 1 <NA> NA
## 2 Y FALSE
## 3 <NA> NA
## 4 N FALSE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 1 NA
## 2 0.03645924
## 3 NA
## 4 0.17324831
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 1 NA
## 2 TRUE
## 3 NA
## 4 TRUE
## outdoor.fctr.Final..rcv.glmnet.prob outdoor.fctr.Final..rcv.glmnet
## 1 0.4091560 Y
## 2 0.7858661 Y
## 3 0.3794830 Y
## 4 0.5701494 Y
## outdoor.fctr.Final..rcv.glmnet.err
## 1 FALSE
## 2 FALSE
## 3 TRUE
## 4 TRUE
## outdoor.fctr.Final..rcv.glmnet.err.abs
## 1 0.5908440
## 2 0.2141339
## 3 0.3794830
## 4 0.5701494
## outdoor.fctr.Final..rcv.glmnet.is.acc
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## outdoor.fctr.Final..rcv.glmnet.accurate
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## outdoor.fctr.Final..rcv.glmnet.error .label
## 1 0.0000000 723
## 2 0.0000000 917
## 3 0.1794830 629
## 4 0.3701494 1291
## [1] "Inaccurate: "
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 1 2185 N NA
## 2 814 N 0.05634482
## 3 202 N NA
## 4 1634 N NA
## 5 922 N 0.13366474
## 6 87 N 0.08169804
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 1 <NA> NA
## 2 N FALSE
## 3 <NA> NA
## 4 <NA> NA
## 5 N FALSE
## 6 N FALSE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 1 NA
## 2 0.05634482
## 3 NA
## 4 NA
## 5 0.13366474
## 6 0.08169804
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 1 NA
## 2 TRUE
## 3 NA
## 4 NA
## 5 TRUE
## 6 TRUE
## outdoor.fctr.Final..rcv.glmnet.prob outdoor.fctr.Final..rcv.glmnet
## 1 0.2722718 Y
## 2 0.2859921 Y
## 3 0.2953479 Y
## 4 0.3042856 Y
## 5 0.3053582 Y
## 6 0.3113133 Y
## outdoor.fctr.Final..rcv.glmnet.err
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
## outdoor.fctr.Final..rcv.glmnet.err.abs
## 1 0.2722718
## 2 0.2859921
## 3 0.2953479
## 4 0.3042856
## 5 0.3053582
## 6 0.3113133
## outdoor.fctr.Final..rcv.glmnet.is.acc
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## outdoor.fctr.Final..rcv.glmnet.accurate
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## outdoor.fctr.Final..rcv.glmnet.error
## 1 0.07227178
## 2 0.08599208
## 3 0.09534792
## 4 0.10428556
## 5 0.10535824
## 6 0.11131326
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 97 1639 N NA
## 231 23 N NA
## 266 2305 N NA
## 267 2170 N NA
## 440 1056 N 0.4935066
## 732 3999 N NA
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 97 <NA> NA
## 231 <NA> NA
## 266 <NA> NA
## 267 <NA> NA
## 440 Y TRUE
## 732 <NA> NA
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 97 NA
## 231 NA
## 266 NA
## 267 NA
## 440 0.4935066
## 732 NA
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 97 NA
## 231 NA
## 266 NA
## 267 NA
## 440 FALSE
## 732 NA
## outdoor.fctr.Final..rcv.glmnet.prob outdoor.fctr.Final..rcv.glmnet
## 97 0.4024141 Y
## 231 0.4417346 Y
## 266 0.4499984 Y
## 267 0.4500352 Y
## 440 0.4816519 Y
## 732 0.5271487 Y
## outdoor.fctr.Final..rcv.glmnet.err
## 97 TRUE
## 231 TRUE
## 266 TRUE
## 267 TRUE
## 440 TRUE
## 732 TRUE
## outdoor.fctr.Final..rcv.glmnet.err.abs
## 97 0.4024141
## 231 0.4417346
## 266 0.4499984
## 267 0.4500352
## 440 0.4816519
## 732 0.5271487
## outdoor.fctr.Final..rcv.glmnet.is.acc
## 97 FALSE
## 231 FALSE
## 266 FALSE
## 267 FALSE
## 440 FALSE
## 732 FALSE
## outdoor.fctr.Final..rcv.glmnet.accurate
## 97 FALSE
## 231 FALSE
## 266 FALSE
## 267 FALSE
## 440 FALSE
## 732 FALSE
## outdoor.fctr.Final..rcv.glmnet.error
## 97 0.2024141
## 231 0.2417346
## 266 0.2499984
## 267 0.2500352
## 440 0.2816519
## 732 0.3271487
## business_id outdoor.fctr outdoor.fctr.All.X..rcv.glmnet.prob
## 992 3580 N 0.8621914
## 993 3945 N 0.7987877
## 994 2562 N NA
## 995 3810 N 0.8712620
## 996 2296 N NA
## 997 2121 N 0.8513290
## outdoor.fctr.All.X..rcv.glmnet outdoor.fctr.All.X..rcv.glmnet.err
## 992 Y TRUE
## 993 Y TRUE
## 994 <NA> NA
## 995 Y TRUE
## 996 <NA> NA
## 997 Y TRUE
## outdoor.fctr.All.X..rcv.glmnet.err.abs
## 992 0.8621914
## 993 0.7987877
## 994 NA
## 995 0.8712620
## 996 NA
## 997 0.8513290
## outdoor.fctr.All.X..rcv.glmnet.is.acc
## 992 FALSE
## 993 FALSE
## 994 NA
## 995 FALSE
## 996 NA
## 997 FALSE
## outdoor.fctr.Final..rcv.glmnet.prob outdoor.fctr.Final..rcv.glmnet
## 992 0.6363084 Y
## 993 0.6501289 Y
## 994 0.6509945 Y
## 995 0.6535319 Y
## 996 0.6757787 Y
## 997 0.7013368 Y
## outdoor.fctr.Final..rcv.glmnet.err
## 992 TRUE
## 993 TRUE
## 994 TRUE
## 995 TRUE
## 996 TRUE
## 997 TRUE
## outdoor.fctr.Final..rcv.glmnet.err.abs
## 992 0.6363084
## 993 0.6501289
## 994 0.6509945
## 995 0.6535319
## 996 0.6757787
## 997 0.7013368
## outdoor.fctr.Final..rcv.glmnet.is.acc
## 992 FALSE
## 993 FALSE
## 994 FALSE
## 995 FALSE
## 996 FALSE
## 997 FALSE
## outdoor.fctr.Final..rcv.glmnet.accurate
## 992 FALSE
## 993 FALSE
## 994 FALSE
## 995 FALSE
## 996 FALSE
## 997 FALSE
## outdoor.fctr.Final..rcv.glmnet.error
## 992 0.4363084
## 993 0.4501289
## 994 0.4509945
## 995 0.4535319
## 996 0.4757787
## 997 0.5013368
dsp_feats_vctr <- c(NULL)
for(var in grep(".imp", names(glb_feats_df), fixed=TRUE, value=TRUE))
dsp_feats_vctr <- union(dsp_feats_vctr,
glb_feats_df[!is.na(glb_feats_df[, var]), "id"])
# print(glbObsTrn[glbObsTrn$UniqueID %in% FN_OOB_ids,
# grep(glb_rsp_var, names(glbObsTrn), value=TRUE)])
print(setdiff(names(glbObsTrn), names(glbObsAll)))
## [1] "outdoor.fctr.Final..rcv.glmnet.prob"
## [2] "outdoor.fctr.Final..rcv.glmnet"
## [3] "outdoor.fctr.Final..rcv.glmnet.err"
## [4] "outdoor.fctr.Final..rcv.glmnet.err.abs"
## [5] "outdoor.fctr.Final..rcv.glmnet.is.acc"
for (col in setdiff(names(glbObsTrn), names(glbObsAll)))
# Merge or cbind ?
glbObsAll[glbObsAll$.src == "Train", col] <- glbObsTrn[, col]
print(setdiff(names(glbObsFit), names(glbObsAll)))
## character(0)
print(setdiff(names(glbObsOOB), names(glbObsAll)))
## character(0)
for (col in setdiff(names(glbObsOOB), names(glbObsAll)))
# Merge or cbind ?
glbObsAll[glbObsAll$.lcn == "OOB", col] <- glbObsOOB[, col]
print(setdiff(names(glbObsNew), names(glbObsAll)))
## character(0)
#glb2Sav(); all.equal(savObsAll, glbObsAll); all.equal(sav_models_lst, glb_models_lst)
#load(file = paste0(glbOut$pfx, "dsk_knitr.RData"))
#cmpCols <- names(glbObsAll)[!grepl("\\.Final\\.", names(glbObsAll))]; all.equal(savObsAll[, cmpCols], glbObsAll[, cmpCols]); all.equal(savObsAll[, "H.P.http"], glbObsAll[, "H.P.http"]);
replay.petrisim(pn = glb_analytics_pn,
replay.trans = (glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all.prediction","model.final")), flip_coord = TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
## 3.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: data.training.all.prediction
## 4.0000 5 0 1 1 1
## 4.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: model.final
## 5.0000 4 0 0 2 1
glb_chunks_df <- myadd_chunk(glb_chunks_df, "predict.data.new", major.inc = TRUE)
## label step_major step_minor label_minor bgn end
## 21 fit.data.training 9 1 1 330.427 337.217
## 22 predict.data.new 10 0 0 337.218 NA
## elapsed
## 21 6.791
## 22 NA
10.0: predict data new## Warning in glb_get_predictions(obs_df, mdl_id = glb_fin_mdl_id, rsp_var =
## glb_rsp_var, : Using default probability threshold: 0.2
## Warning in glb_get_predictions(obs_df, mdl_id = glb_fin_mdl_id, rsp_var =
## glb_rsp_var, : Using default probability threshold: 0.2
## Warning in glb_analytics_diag_plots(obs_df = glbObsNew, mdl_id =
## glb_fin_mdl_id, : Limiting important feature scatter plots to 5 out of 55
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## Warning: Removed 10000 rows containing missing values (geom_point).
## NULL
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
##
## expand
## [1] "OOBobs outdoor.fctr.All.X..rcv.glmnet Y: min < min of Train range: 21"
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean CorGR.mad
## 3 100 Y 3 0.9534157 0.06443954
## 411 1664 Y 411 0.9450957 0.11783638
## 438 1733 Y 438 0.9182391 0.07733427
## 1153 3080 Y 1153 0.6990869 0.06605984
## 1340 3431 Y 1340 0.9457793 0.03043020
## 1778 630 Y 1778 0.9754135 0.03730625
## CorGR.mean CorRB.mean CosSmlBG.mad CosSmlBG.mean CosSmlGR.mad
## 3 0.8691129 0.9373822 0.009538704 0.9832855 0.02161117
## 411 0.7556866 0.8797031 0.004206153 0.9872615 0.02099284
## 438 0.7147670 0.8840360 0.017715222 0.9627788 0.01300322
## 1153 0.5722977 0.9204220 0.016592655 0.8793939 0.02781767
## 1340 0.8789159 0.9528149 0.007247153 0.9768245 0.02697337
## 1778 0.9131355 0.9649619 0.003181644 0.9919500 0.01541220
## CosSmlRB.mad lumB.mean.mad lumG.mad.mad lumG.mean.mad lumR.mean.mad
## 3 0.02161117 0.21857605 0.12930016 0.24467747 0.19252560
## 411 0.02099284 0.08143041 0.06034008 0.08514664 0.07934835
## 438 0.01300322 0.02497502 0.04310005 0.03443812 0.11640653
## 1153 0.02781767 0.02991750 0.11206014 0.08588503 0.00382355
## 1340 0.02697337 0.16295491 0.14654018 0.14328800 0.16223971
## 1778 0.01541220 0.02921797 0.11206014 0.04538060 0.03954695
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean
## 411 1664 Y 411 0.9450957
## 1153 3080 Y 1153 0.6990869
## 423 1697 Y 423 0.8936020
## 638 2108 Y 638 0.9161732
## 896 2601 Y 896 0.9381848
## 1027 2846 Y 1027 0.9528002
## CorGR.mad CorGR.mean CorRB.mean CosSmlBG.mad CosSmlBG.mean
## 411 0.117836378 0.7556866 0.8797031 0.004206153 0.9872615
## 1153 0.066059845 0.5722977 0.9204220 0.016592655 0.8793939
## 423 0.202781928 0.6031397 0.7945435 0.013975870 0.9670249
## 638 0.176609701 0.6305973 0.7513301 0.012707376 0.9704923
## 896 0.091544398 0.8586091 0.9482939 0.020338246 0.9710365
## 1027 0.002569433 0.8005106 0.8557567 0.005760986 0.9877476
## CosSmlGR.mad CosSmlRB.mad lumB.mean.mad lumG.mad.mad lumG.mean.mad
## 411 0.020992836 0.020992836 0.08143041 0.06034008 0.08514664
## 1153 0.027817673 0.027817673 0.02991750 0.11206014 0.08588503
## 423 0.035023993 0.035023993 0.07961865 0.17240022 0.10075200
## 638 0.053199674 0.053199674 0.09007279 0.05172006 0.03840424
## 896 0.031825158 0.031825158 0.06604578 0.08620011 0.13468772
## 1027 0.005660636 0.005660636 0.02824066 0.03017004 0.01148923
## lumR.mean.mad
## 411 0.07934835
## 1153 0.00382355
## 423 0.13092209
## 638 0.05551682
## 896 0.01255001
## 1027 0.02787697
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean CorGR.mad
## 1851 739 Y 1851 0.9700280 0.04316501
## 1 1000 Y 1 0.9651591 0.05089842
## 1916 848 Y 1916 0.9470680 0.03041371
## 801 2414 Y 801 0.9655465 0.03137102
## 1970 940 Y 1970 0.8538433 0.13363764
## 1992 985 Y 1992 0.9138121 0.07401777
## CorGR.mean CorRB.mean CosSmlBG.mad CosSmlBG.mean CosSmlGR.mad
## 1851 0.8658395 0.9112972 0.003906950 0.9919660 0.020070554
## 1 0.8937347 0.9517167 0.007596351 0.9889509 0.020210611
## 1916 0.8376879 0.8982008 0.020953242 0.9776730 0.011051731
## 801 0.9159928 0.9727625 0.005539830 0.9905542 0.010457244
## 1970 0.7216448 0.6664379 0.021009258 0.9466775 0.049160284
## 1992 0.8113209 0.9467382 0.011789779 0.9695080 0.009706923
## CosSmlRB.mad lumB.mean.mad lumG.mad.mad lumG.mean.mad lumR.mean.mad
## 1851 0.020070554 0.16061318 0.09913012 0.11123591 0.19617118
## 1 0.020210611 0.11675258 0.10344013 0.15449772 0.13040317
## 1916 0.011051731 0.03855610 0.01724002 0.04197542 0.06221218
## 801 0.010457244 0.08225465 0.08189010 0.11472078 0.06062840
## 1970 0.049160284 0.13809652 0.10344013 0.04921795 0.07216195
## 1992 0.009706923 0.05919116 0.07758010 0.10998848 0.12540762
## id cor.y exclude.as.feat cor.y.abs
## .pos .pos 0.027497300 FALSE 0.027497300
## CorBG.mean CorBG.mean 0.016157691 FALSE 0.016157691
## CorGR.mad CorGR.mad 0.030253289 FALSE 0.030253289
## CorGR.mean CorGR.mean 0.004925319 FALSE 0.004925319
## CorRB.mean CorRB.mean -0.009617034 FALSE 0.009617034
## CosSmlBG.mad CosSmlBG.mad -0.046206836 FALSE 0.046206836
## CosSmlBG.mean CosSmlBG.mean 0.030023372 FALSE 0.030023372
## CosSmlGR.mad CosSmlGR.mad -0.003587615 FALSE 0.003587615
## CosSmlRB.mad CosSmlRB.mad -0.003587615 FALSE 0.003587615
## lumB.mean.mad lumB.mean.mad 0.049622224 FALSE 0.049622224
## lumG.mad.mad lumG.mad.mad 0.069818982 FALSE 0.069818982
## lumG.mean.mad lumG.mean.mad 0.051616272 FALSE 0.051616272
## lumR.mean.mad lumR.mean.mad 0.015642413 FALSE 0.015642413
## cor.high.X freqRatio percentUnique zeroVar nzv
## .pos <NA> 1.000000 100.00 FALSE FALSE
## CorBG.mean CosSmlBG.mean 1.000000 99.90 FALSE FALSE
## CorGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CorGR.mean <NA> 2.000000 99.95 FALSE FALSE
## CorRB.mean CorRB.mad 1.000000 100.00 FALSE FALSE
## CosSmlBG.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlBG.mean CosSmlBG.mad 1.000000 99.70 FALSE FALSE
## CosSmlGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlRB.mad <NA> 1.000000 100.00 FALSE FALSE
## lumB.mean.mad lumG.mean.mad 1.000000 100.00 FALSE FALSE
## lumG.mad.mad <NA> 1.049261 2.40 FALSE FALSE
## lumG.mean.mad <NA> 2.000000 99.95 FALSE FALSE
## lumR.mean.mad <NA> 2.000000 99.95 FALSE FALSE
## is.cor.y.abs.low interaction.feat shapiro.test.p.value
## .pos FALSE NA 2.145811e-24
## CorBG.mean FALSE NA 8.214498e-35
## CorGR.mad FALSE NA 7.392587e-28
## CorGR.mean TRUE NA 1.178044e-23
## CorRB.mean FALSE NA 1.946414e-36
## CosSmlBG.mad FALSE NA 7.694894e-38
## CosSmlBG.mean FALSE NA 1.504448e-37
## CosSmlGR.mad TRUE NA 1.304267e-33
## CosSmlRB.mad TRUE NA 1.304267e-33
## lumB.mean.mad FALSE NA 1.492315e-16
## lumG.mad.mad FALSE NA 1.047894e-17
## lumG.mean.mad FALSE NA 6.299586e-18
## lumR.mean.mad FALSE NA 3.610555e-14
## rsp_var_raw id_var rsp_var max min
## .pos FALSE NA NA 1.200000e+04 1.0000000
## CorBG.mean FALSE NA NA 9.873230e-01 0.3973767
## CorGR.mad FALSE NA NA 4.591960e-01 0.0000000
## CorGR.mean FALSE NA NA 9.563650e-01 0.0972070
## CorRB.mean FALSE NA NA 9.814793e-01 0.6664379
## CosSmlBG.mad FALSE NA NA 8.290404e-02 0.0000000
## CosSmlBG.mean FALSE NA NA 9.952656e-01 0.7560239
## CosSmlGR.mad FALSE NA NA 1.971821e-01 0.0000000
## CosSmlRB.mad FALSE NA NA 1.971821e-01 0.0000000
## lumB.mean.mad FALSE NA NA 3.437349e-01 0.0000000
## lumG.mad.mad FALSE NA NA 3.017004e-01 0.0000000
## lumG.mean.mad FALSE NA NA 3.773683e-01 0.0000000
## lumR.mean.mad FALSE NA NA 3.627523e-01 0.0000000
## max.outdoor.fctr.N max.outdoor.fctr.Y min.outdoor.fctr.N
## .pos 2.000000e+03 1.996000e+03 4.000000000
## CorBG.mean 9.746994e-01 9.752289e-01 0.798558813
## CorGR.mad 2.598524e-01 2.760160e-01 0.032594657
## CorGR.mean 9.143933e-01 9.168114e-01 0.552684724
## CorRB.mean 9.720959e-01 9.703118e-01 0.747679244
## CosSmlBG.mad 5.922465e-02 5.333009e-02 0.003496574
## CosSmlBG.mean 9.909058e-01 9.890039e-01 0.892165857
## CosSmlGR.mad 1.214126e-01 1.021883e-01 0.009921125
## CosSmlRB.mad 1.214126e-01 1.021883e-01 0.009921125
## lumB.mean.mad 2.197032e-01 3.251330e-01 0.042453945
## lumG.mad.mad 2.068803e-01 3.017004e-01 0.025860032
## lumG.mean.mad 2.696606e-01 3.267359e-01 0.022242521
## lumR.mean.mad 2.805100e-01 2.855381e-01 0.031401570
## min.outdoor.fctr.Y max.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 6.000000000 1.997000e+03
## CorBG.mean 0.797634711 9.638528e-01
## CorGR.mad 0.034213005 2.206953e-01
## CorGR.mean 0.603879667 8.924409e-01
## CorRB.mean 0.773861657 9.572325e-01
## CosSmlBG.mad 0.004274444 7.349383e-02
## CosSmlBG.mean 0.899024033 9.900264e-01
## CosSmlGR.mad 0.010443564 1.410021e-01
## CosSmlRB.mad 0.010443564 1.410021e-01
## lumB.mean.mad 0.029486380 2.000383e-01
## lumG.mad.mad 0.034480043 1.680902e-01
## lumG.mean.mad 0.024856113 1.858523e-01
## lumR.mean.mad 0.021198356 2.472881e-01
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.998000e+03
## CorBG.mean 9.763031e-01
## CorGR.mad 2.998611e-01
## CorGR.mean 9.159928e-01
## CorRB.mean 9.727625e-01
## CosSmlBG.mad 5.448611e-02
## CosSmlBG.mean 9.919660e-01
## CosSmlGR.mad 1.318410e-01
## CosSmlRB.mad 1.318410e-01
## lumB.mean.mad 2.318285e-01
## lumG.mad.mad 2.284303e-01
## lumG.mean.mad 2.446775e-01
## lumR.mean.mad 2.396022e-01
## min.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 5.000000000
## CorBG.mean 0.825461596
## CorGR.mad 0.032701090
## CorGR.mean 0.668245044
## CorRB.mean 0.844342971
## CosSmlBG.mad 0.005967801
## CosSmlBG.mean 0.909692822
## CosSmlGR.mad 0.015439896
## CosSmlRB.mad 0.015439896
## lumB.mean.mad 0.053421167
## lumG.mad.mad 0.025860032
## lumG.mean.mad 0.038196141
## lumR.mean.mad 0.035560160
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.000000000
## CorBG.mean 0.699086917
## CorGR.mad 0.002569433
## CorGR.mean 0.572297724
## CorRB.mean 0.666437858
## CosSmlBG.mad 0.002835269
## CosSmlBG.mean 0.879393898
## CosSmlGR.mad 0.005660636
## CosSmlRB.mad 0.005660636
## lumB.mean.mad 0.024975023
## lumG.mad.mad 0.017240022
## lumG.mean.mad 0.011489233
## lumR.mean.mad 0.003823550
## max.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 1.200000e+04
## CorBG.mean 9.873230e-01
## CorGR.mad 4.591960e-01
## CorGR.mean 9.563650e-01
## CorRB.mean 9.814793e-01
## CosSmlBG.mad 8.290404e-02
## CosSmlBG.mean 9.952656e-01
## CosSmlGR.mad 1.971821e-01
## CosSmlRB.mad 1.971821e-01
## lumB.mean.mad 3.437349e-01
## lumG.mad.mad 2.758403e-01
## lumG.mean.mad 3.773683e-01
## lumR.mean.mad 3.627523e-01
## min.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 2001.0000000
## CorBG.mean 0.3973767
## CorGR.mad 0.0000000
## CorGR.mean 0.0972070
## CorRB.mean 0.6930769
## CosSmlBG.mad 0.0000000
## CosSmlBG.mean 0.7560239
## CosSmlGR.mad 0.0000000
## CosSmlRB.mad 0.0000000
## lumB.mean.mad 0.0000000
## lumG.mad.mad 0.0000000
## lumG.mean.mad 0.0000000
## lumR.mean.mad 0.0000000
## [1] "OOBobs outdoor.fctr.All.X..rcv.glmnet Y: max > max of Train range: 28"
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean CorGR.mad
## 255 1407 Y 255 0.9549695 0.06432043
## 441 1737 Y 441 0.8521702 0.28657031
## 711 225 Y 711 0.8865274 0.16763950
## 1778 630 Y 1778 0.9754135 0.03730625
## 125 1201 Y 125 0.9562179 0.07756351
## 561 195 Y 561 0.9197286 0.15091844
## CorRB.mad CorRB.mean CosSmlBG.mad CosSmlBG.mean CosSmlGR.mad
## 255 0.01729136 0.9525404 0.017703045 0.9732530 0.03180646
## 441 0.08002369 0.8833447 0.049955200 0.9293498 0.12621140
## 711 0.05036498 0.9233587 0.051842624 0.9292719 0.11498611
## 1778 0.01605121 0.9649619 0.003181644 0.9919500 0.01541220
## 125 0.05295903 0.9332332 0.011032926 0.9867521 0.02273428
## 561 0.06158377 0.8932733 0.012919425 0.9778230 0.03162892
## CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad lumB.mad.mean
## 255 0.9372639 0.03180646 0.9372639 0.19826025 0.2657052
## 441 0.8495010 0.12621140 0.8495010 0.08620011 0.2145063
## 711 0.8578498 0.11498611 0.8578498 0.11206014 0.2091511
## 1778 0.9748654 0.01541220 0.9748654 0.10344013 0.1639778
## 125 0.9625913 0.02273428 0.9625913 0.09482012 0.2161606
## 561 0.9364955 0.03162892 0.9364955 0.06034008 0.2344504
## lumR.mad.mean lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resXY.mean
## 255 0.3137685 0.4366273 30 3.433987 5.477226 182959.1
## 441 0.2320642 0.4372637 151 5.023881 12.288206 173319.3
## 711 0.2446644 0.4288620 37 3.637586 6.082763 180743.2
## 1778 0.1663429 0.4942236 59 4.094345 7.681146 185042.4
## 125 0.1816912 0.6443992 28 3.367296 5.291503 187500.0
## 561 0.2132367 0.5588305 37 3.637586 6.082763 208702.7
## resXY.mean.log1p resXY.mean.root2
## 255 12.11702 427.7371
## 441 12.06290 416.3164
## 711 12.10484 425.1391
## 1778 12.12835 430.1655
## 125 12.14154 433.0127
## 561 12.24867 456.8399
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean CorGR.mad
## 711 225 Y 711 0.8865274 0.16763950
## 1777 629 Y 1777 0.9313882 0.05493422
## 60 1092 Y 60 0.9014258 0.07968118
## 682 2191 Y 682 0.9587073 0.09046716
## 897 2602 Y 897 0.9065179 0.09642553
## 1569 3846 Y 1569 0.9556651 0.09794243
## CorRB.mad CorRB.mean CosSmlBG.mad CosSmlBG.mean CosSmlGR.mad
## 711 0.05036498 0.9233587 0.051842624 0.9292719 0.11498611
## 1777 0.05433047 0.8922538 0.013851576 0.9741940 0.02666473
## 60 0.05195761 0.9059727 0.014320594 0.9691783 0.02554787
## 682 0.05733392 0.8945790 0.006211368 0.9894433 0.02292900
## 897 0.04388374 0.9211759 0.026366127 0.9590636 0.06670649
## 1569 0.04323826 0.9304064 0.005971016 0.9893194 0.01353066
## CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad lumB.mad.mean
## 711 0.8578498 0.11498611 0.8578498 0.11206014 0.2091511
## 1777 0.9326118 0.02666473 0.9326118 0.09051011 0.2610539
## 60 0.9353725 0.02554787 0.9353725 0.06465008 0.2267506
## 682 0.9646381 0.02292900 0.9646381 0.09913012 0.1711531
## 897 0.9105043 0.06670649 0.9105043 0.19395024 0.2186754
## 1569 0.9697832 0.01353066 0.9697832 0.09482012 0.1645898
## lumR.mad.mean lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resXY.mean
## 711 0.2446644 0.4288620 37 3.637586 6.082763 180743.2
## 1777 0.2049476 0.6691801 20 3.044522 4.472136 207438.0
## 60 0.2440314 0.5163101 36 3.610918 6.000000 218277.8
## 682 0.1682460 0.4768067 16 2.833213 4.000000 181500.0
## 897 0.2529141 0.3746519 18 2.944439 4.242641 195777.8
## 1569 0.1473628 0.5255165 81 4.406719 9.000000 179981.5
## resXY.mean.log1p resXY.mean.root2
## 711 12.10484 425.1391
## 1777 12.24259 455.4536
## 60 12.29353 467.2021
## 682 12.10902 426.0282
## 897 12.18474 442.4678
## 1569 12.10061 424.2422
## business_id outdoor.fctr.All.X..rcv.glmnet .pos CorBG.mean CorGR.mad
## 1519 3761 Y 1519 0.9311919 0.09725804
## 260 14 Y 260 0.9204320 0.29986110
## 801 2414 Y 801 0.9655465 0.03137102
## 1708 487 Y 1708 0.9132529 0.13746009
## 1849 736 Y 1849 0.9763031 0.07289923
## 1970 940 Y 1970 0.8538433 0.13363764
## CorRB.mad CorRB.mean CosSmlBG.mad CosSmlBG.mean CosSmlGR.mad
## 1519 0.03684362 0.9264053 0.023854455 0.9666211 0.06700260
## 260 0.21274608 0.7775400 0.016933538 0.9711220 0.09231549
## 801 0.01152580 0.9727625 0.005539830 0.9905542 0.01045724
## 1708 0.05751736 0.9031499 0.016066607 0.9706049 0.03889219
## 1849 0.07254887 0.9064160 0.006141621 0.9856210 0.02097063
## 1970 0.28671209 0.6664379 0.021009258 0.9466775 0.04916028
## CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad lumB.mad.mean
## 1519 0.9082908 0.06700260 0.9082908 0.19826025 0.2589774
## 260 0.8967539 0.09231549 0.8967539 0.07758010 0.2339255
## 801 0.9774375 0.01045724 0.9774375 0.07327009 0.2566518
## 1708 0.9261288 0.03889219 0.9261288 0.10344013 0.2447985
## 1849 0.9430025 0.02097063 0.9430025 0.20688026 0.2558212
## 1970 0.8985481 0.04916028 0.8985481 0.03448004 0.2023313
## lumR.mad.mean lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resXY.mean
## 1519 0.2622998 0.4715289 35 3.583519 5.916080 185957.1
## 260 0.1992882 0.5579907 47 3.871201 6.855655 181212.8
## 801 0.2354718 0.4871434 14 2.708050 3.741657 178369.4
## 1708 0.2368643 0.4837614 1954 7.578145 44.204072 185510.8
## 1849 0.2455347 0.4154662 13 2.639057 3.605551 192423.1
## 1970 0.1551207 0.6617843 25 3.258097 5.000000 193145.0
## resXY.mean.log1p resXY.mean.root2
## 1519 12.13328 431.2275
## 260 12.10743 425.6909
## 801 12.09162 422.3380
## 1708 12.13087 430.7096
## 1849 12.16746 438.6605
## 1970 12.17120 439.4826
## id cor.y exclude.as.feat cor.y.abs
## .pos .pos 0.027497300 FALSE 0.027497300
## CorBG.mean CorBG.mean 0.016157691 FALSE 0.016157691
## CorGR.mad CorGR.mad 0.030253289 FALSE 0.030253289
## CorRB.mad CorRB.mad 0.038198961 FALSE 0.038198961
## CorRB.mean CorRB.mean -0.009617034 FALSE 0.009617034
## CosSmlBG.mad CosSmlBG.mad -0.046206836 FALSE 0.046206836
## CosSmlBG.mean CosSmlBG.mean 0.030023372 FALSE 0.030023372
## CosSmlGR.mad CosSmlGR.mad -0.003587615 FALSE 0.003587615
## CosSmlGR.mean CosSmlGR.mean 0.021022718 FALSE 0.021022718
## CosSmlRB.mad CosSmlRB.mad -0.003587615 FALSE 0.003587615
## CosSmlRB.mean CosSmlRB.mean 0.021022718 FALSE 0.021022718
## lumB.mad.mad lumB.mad.mad 0.038630626 FALSE 0.038630626
## lumB.mad.mean lumB.mad.mean 0.019323904 FALSE 0.019323904
## lumR.mad.mean lumR.mad.mean 0.036065638 FALSE 0.036065638
## lumR.mean.mean lumR.mean.mean -0.115393376 FALSE 0.115393376
## nImgs nImgs -0.014963676 FALSE 0.014963676
## nImgs.log1p nImgs.log1p 0.047250893 FALSE 0.047250893
## nImgs.root2 nImgs.root2 0.014028124 FALSE 0.014028124
## resXY.mean resXY.mean -0.009002880 FALSE 0.009002880
## resXY.mean.log1p resXY.mean.log1p -0.004867571 FALSE 0.004867571
## resXY.mean.root2 resXY.mean.root2 -0.007039955 FALSE 0.007039955
## cor.high.X freqRatio percentUnique zeroVar nzv
## .pos <NA> 1.000000 100.00 FALSE FALSE
## CorBG.mean CosSmlBG.mean 1.000000 99.90 FALSE FALSE
## CorGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CorRB.mad <NA> 1.000000 100.00 FALSE FALSE
## CorRB.mean CorRB.mad 1.000000 100.00 FALSE FALSE
## CosSmlBG.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlBG.mean CosSmlBG.mad 1.000000 99.70 FALSE FALSE
## CosSmlGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70 FALSE FALSE
## CosSmlRB.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70 FALSE FALSE
## lumB.mad.mad <NA> 1.022624 2.75 FALSE FALSE
## lumB.mad.mean <NA> 1.200000 92.35 FALSE FALSE
## lumR.mad.mean <NA> 1.142857 93.95 FALSE FALSE
## lumR.mean.mean <NA> 1.000000 100.00 FALSE FALSE
## nImgs <NA> 1.033333 19.10 FALSE FALSE
## nImgs.log1p nImgs.cut.fctr 1.033333 19.10 FALSE FALSE
## nImgs.root2 nImgs.log1p 1.033333 19.10 FALSE FALSE
## resXY.mean <NA> 6.000000 98.55 FALSE FALSE
## resXY.mean.log1p <NA> 4.000000 90.80 FALSE FALSE
## resXY.mean.root2 <NA> 6.000000 98.20 FALSE FALSE
## is.cor.y.abs.low interaction.feat shapiro.test.p.value
## .pos FALSE NA 2.145811e-24
## CorBG.mean FALSE NA 8.214498e-35
## CorGR.mad FALSE NA 7.392587e-28
## CorRB.mad FALSE NA 2.592335e-42
## CorRB.mean FALSE NA 1.946414e-36
## CosSmlBG.mad FALSE NA 7.694894e-38
## CosSmlBG.mean FALSE NA 1.504448e-37
## CosSmlGR.mad TRUE NA 1.304267e-33
## CosSmlGR.mean FALSE NA 9.960727e-29
## CosSmlRB.mad TRUE NA 1.304267e-33
## CosSmlRB.mean FALSE NA 9.960727e-29
## lumB.mad.mad FALSE NA 5.263514e-19
## lumB.mad.mean FALSE NA 2.584492e-12
## lumR.mad.mean FALSE NA 5.440725e-06
## lumR.mean.mean FALSE NA 4.578209e-07
## nImgs FALSE NA 1.364097e-61
## nImgs.log1p FALSE NA 1.234907e-13
## nImgs.root2 FALSE NA 4.118632e-46
## resXY.mean FALSE NA 2.964553e-36
## resXY.mean.log1p TRUE NA 6.980019e-43
## resXY.mean.root2 TRUE NA 1.780045e-39
## rsp_var_raw id_var rsp_var max min
## .pos FALSE NA NA 1.200000e+04 1.000000e+00
## CorBG.mean FALSE NA NA 9.873230e-01 3.973767e-01
## CorGR.mad FALSE NA NA 4.591960e-01 0.000000e+00
## CorRB.mad FALSE NA NA 2.867121e-01 0.000000e+00
## CorRB.mean FALSE NA NA 9.814793e-01 6.664379e-01
## CosSmlBG.mad FALSE NA NA 8.290404e-02 0.000000e+00
## CosSmlBG.mean FALSE NA NA 9.952656e-01 7.560239e-01
## CosSmlGR.mad FALSE NA NA 1.971821e-01 0.000000e+00
## CosSmlGR.mean FALSE NA NA 9.832886e-01 7.321306e-01
## CosSmlRB.mad FALSE NA NA 1.971821e-01 0.000000e+00
## CosSmlRB.mean FALSE NA NA 9.832886e-01 7.321306e-01
## lumB.mad.mad FALSE NA NA 2.672203e-01 0.000000e+00
## lumB.mad.mean FALSE NA NA 3.621365e-01 1.046541e-01
## lumR.mad.mean FALSE NA NA 3.837318e-01 1.104682e-01
## lumR.mean.mean FALSE NA NA 6.986750e-01 2.684265e-01
## nImgs FALSE NA NA 2.974000e+03 1.000000e+00
## nImgs.log1p FALSE NA NA 7.997999e+00 6.931472e-01
## nImgs.root2 FALSE NA NA 5.453439e+01 1.000000e+00
## resXY.mean FALSE NA NA 2.500000e+05 8.762615e+04
## resXY.mean.log1p FALSE NA NA 1.242922e+01 1.138085e+01
## resXY.mean.root2 FALSE NA NA 5.000000e+02 2.960172e+02
## max.outdoor.fctr.N max.outdoor.fctr.Y min.outdoor.fctr.N
## .pos 2.000000e+03 1.996000e+03 4.000000e+00
## CorBG.mean 9.746994e-01 9.752289e-01 7.985588e-01
## CorGR.mad 2.598524e-01 2.760160e-01 3.259466e-02
## CorRB.mad 1.707495e-01 1.856037e-01 7.217628e-03
## CorRB.mean 9.720959e-01 9.703118e-01 7.476792e-01
## CosSmlBG.mad 5.922465e-02 5.333009e-02 3.496574e-03
## CosSmlBG.mean 9.909058e-01 9.890039e-01 8.921659e-01
## CosSmlGR.mad 1.214126e-01 1.021883e-01 9.921125e-03
## CosSmlGR.mean 9.727175e-01 9.704089e-01 8.492029e-01
## CosSmlRB.mad 1.214126e-01 1.021883e-01 9.921125e-03
## CosSmlRB.mean 9.727175e-01 9.704089e-01 8.492029e-01
## lumB.mad.mad 2.672203e-01 1.896402e-01 8.620011e-03
## lumB.mad.mean 3.621365e-01 3.130679e-01 1.094111e-01
## lumR.mad.mean 3.232053e-01 3.166458e-01 1.466861e-01
## lumR.mean.mean 6.328933e-01 6.440937e-01 3.298595e-01
## nImgs 2.974000e+03 1.870000e+03 5.000000e+00
## nImgs.log1p 7.997999e+00 7.534228e+00 1.791759e+00
## nImgs.root2 5.453439e+01 4.324350e+01 2.236068e+00
## resXY.mean 2.175740e+05 2.037333e+05 8.762615e+04
## resXY.mean.log1p 1.229030e+01 1.222457e+01 1.138085e+01
## resXY.mean.root2 4.664483e+02 4.513683e+02 2.960172e+02
## min.outdoor.fctr.Y max.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 6.000000e+00 1.997000e+03
## CorBG.mean 7.976347e-01 9.638528e-01
## CorGR.mad 3.421300e-02 2.206953e-01
## CorRB.mad 8.978361e-03 8.402239e-02
## CorRB.mean 7.738617e-01 9.572325e-01
## CosSmlBG.mad 4.274444e-03 7.349383e-02
## CosSmlBG.mean 8.990240e-01 9.900264e-01
## CosSmlGR.mad 1.044356e-02 1.410021e-01
## CosSmlGR.mean 7.913946e-01 9.734692e-01
## CosSmlRB.mad 1.044356e-02 1.410021e-01
## CosSmlRB.mean 7.913946e-01 9.734692e-01
## lumB.mad.mad 2.586003e-02 1.896402e-01
## lumB.mad.mean 1.217545e-01 3.021552e-01
## lumR.mad.mean 1.411135e-01 2.889616e-01
## lumR.mean.mean 2.699415e-01 6.527598e-01
## nImgs 2.000000e+00 2.020000e+02
## nImgs.log1p 1.098612e+00 5.313206e+00
## nImgs.root2 1.414214e+00 1.421267e+01
## resXY.mean 1.137250e+05 2.010079e+05
## resXY.mean.log1p 1.164155e+01 1.221110e+01
## resXY.mean.root2 3.372314e+02 4.483391e+02
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.998000e+03
## CorBG.mean 9.763031e-01
## CorGR.mad 2.998611e-01
## CorRB.mad 2.867121e-01
## CorRB.mean 9.727625e-01
## CosSmlBG.mad 5.448611e-02
## CosSmlBG.mean 9.919660e-01
## CosSmlGR.mad 1.318410e-01
## CosSmlGR.mean 9.774375e-01
## CosSmlRB.mad 1.318410e-01
## CosSmlRB.mean 9.774375e-01
## lumB.mad.mad 2.068803e-01
## lumB.mad.mean 3.338965e-01
## lumR.mad.mean 3.662894e-01
## lumR.mean.mean 6.691801e-01
## nImgs 2.239000e+03
## nImgs.log1p 7.714231e+00
## nImgs.root2 4.731807e+01
## resXY.mean 2.182778e+05
## resXY.mean.log1p 1.229353e+01
## resXY.mean.root2 4.672021e+02
## min.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 5.000000e+00
## CorBG.mean 8.254616e-01
## CorGR.mad 3.270109e-02
## CorRB.mad 1.112447e-02
## CorRB.mean 8.443430e-01
## CosSmlBG.mad 5.967801e-03
## CosSmlBG.mean 9.096928e-01
## CosSmlGR.mad 1.543990e-02
## CosSmlGR.mean 7.730400e-01
## CosSmlRB.mad 1.543990e-02
## CosSmlRB.mean 7.730400e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.220965e-01
## lumR.mad.mean 1.437851e-01
## lumR.mean.mean 4.160745e-01
## nImgs 8.000000e+00
## nImgs.log1p 2.197225e+00
## nImgs.root2 2.828427e+00
## resXY.mean 1.445588e+05
## resXY.mean.log1p 1.188145e+01
## resXY.mean.root2 3.802089e+02
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.000000e+00
## CorBG.mean 6.990869e-01
## CorGR.mad 2.569433e-03
## CorRB.mad 9.440068e-03
## CorRB.mean 6.664379e-01
## CosSmlBG.mad 2.835269e-03
## CosSmlBG.mean 8.793939e-01
## CosSmlGR.mad 5.660636e-03
## CosSmlGR.mean 8.441096e-01
## CosSmlRB.mad 5.660636e-03
## CosSmlRB.mean 8.441096e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.367933e-01
## lumR.mad.mean 1.473628e-01
## lumR.mean.mean 2.979887e-01
## nImgs 2.000000e+00
## nImgs.log1p 1.098612e+00
## nImgs.root2 1.414214e+00
## resXY.mean 1.453448e+05
## resXY.mean.log1p 1.188687e+01
## resXY.mean.root2 3.812411e+02
## max.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 1.200000e+04
## CorBG.mean 9.873230e-01
## CorGR.mad 4.591960e-01
## CorRB.mad 2.375609e-01
## CorRB.mean 9.814793e-01
## CosSmlBG.mad 8.290404e-02
## CosSmlBG.mean 9.952656e-01
## CosSmlGR.mad 1.971821e-01
## CosSmlGR.mean 9.832886e-01
## CosSmlRB.mad 1.971821e-01
## CosSmlRB.mean 9.832886e-01
## lumB.mad.mad 2.629103e-01
## lumB.mad.mean 3.461636e-01
## lumR.mad.mean 3.837318e-01
## lumR.mean.mean 6.986750e-01
## nImgs 2.825000e+03
## nImgs.log1p 7.946618e+00
## nImgs.root2 5.315073e+01
## resXY.mean 2.500000e+05
## resXY.mean.log1p 1.242922e+01
## resXY.mean.root2 5.000000e+02
## min.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 2.001000e+03
## CorBG.mean 3.973767e-01
## CorGR.mad 0.000000e+00
## CorRB.mad 0.000000e+00
## CorRB.mean 6.930769e-01
## CosSmlBG.mad 0.000000e+00
## CosSmlBG.mean 7.560239e-01
## CosSmlGR.mad 0.000000e+00
## CosSmlGR.mean 7.321306e-01
## CosSmlRB.mad 0.000000e+00
## CosSmlRB.mean 7.321306e-01
## lumB.mad.mad 0.000000e+00
## lumB.mad.mean 1.046541e-01
## lumR.mad.mean 1.104682e-01
## lumR.mean.mean 2.684265e-01
## nImgs 1.000000e+00
## nImgs.log1p 6.931472e-01
## nImgs.root2 1.000000e+00
## resXY.mean 1.058460e+05
## resXY.mean.log1p 1.156975e+01
## resXY.mean.root2 3.253398e+02
## [1] "OOBobs outdoor.fctr.All.X..rcv.glmnet N: min < min of Train range: 6"
## business_id outdoor.fctr.All.X..rcv.glmnet CosSmlGR.mean
## 595 202 N 0.9713479
## 600 2032 N 0.8416038
## 662 2146 N 0.9022625
## 1285 333 N 0.8454122
## 309 1490 N 0.8806974
## 1842 723 N 0.7730400
## CosSmlRB.mean lumB.mean.mean lumG.mad.mean lumG.mean.mean
## 595 0.9713479 0.4501774 0.13836293 0.3429582
## 600 0.8416038 0.4082002 0.16211923 0.2451364
## 662 0.9022625 0.3691843 0.20024505 0.2628366
## 1285 0.8454122 0.4389812 0.18750529 0.2810690
## 309 0.8806974 0.3043346 0.09665971 0.1684021
## 1842 0.7730400 0.2351705 0.07599882 0.1453933
## lumR.mad.mean resY.mad.nexp
## 595 0.1437851 1.000000e+00
## 600 0.2706945 1.000000e+00
## 662 0.2742896 4.477805e-85
## 1285 0.2430301 1.095381e-20
## 309 0.2431028 7.239442e-44
## 1842 0.2151224 7.390989e-16
## id cor.y exclude.as.feat cor.y.abs
## CosSmlGR.mean CosSmlGR.mean 0.02102272 FALSE 0.02102272
## CosSmlRB.mean CosSmlRB.mean 0.02102272 FALSE 0.02102272
## lumB.mean.mean lumB.mean.mean -0.03252924 FALSE 0.03252924
## lumG.mad.mean lumG.mad.mean 0.07506242 FALSE 0.07506242
## lumG.mean.mean lumG.mean.mean 0.05249272 FALSE 0.05249272
## lumR.mad.mean lumR.mad.mean 0.03606564 FALSE 0.03606564
## resY.mad.nexp resY.mad.nexp 0.01219034 FALSE 0.01219034
## cor.high.X freqRatio percentUnique zeroVar
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70 FALSE
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70 FALSE
## lumB.mean.mean lumG.mean.mean 2.000000 99.95 FALSE
## lumG.mad.mean lumG.mad.mean.cut.fctr 1.000000 96.00 FALSE
## lumG.mean.mean <NA> 2.000000 99.95 FALSE
## lumR.mad.mean <NA> 1.142857 93.95 FALSE
## resY.mad.nexp <NA> 5.354497 9.05 FALSE
## nzv is.cor.y.abs.low interaction.feat
## CosSmlGR.mean FALSE FALSE NA
## CosSmlRB.mean FALSE FALSE NA
## lumB.mean.mean FALSE FALSE NA
## lumG.mad.mean FALSE FALSE NA
## lumG.mean.mean FALSE FALSE NA
## lumR.mad.mean FALSE FALSE NA
## resY.mad.nexp FALSE FALSE NA
## shapiro.test.p.value rsp_var_raw id_var rsp_var max
## CosSmlGR.mean 9.960727e-29 FALSE NA NA 0.9832886
## CosSmlRB.mean 9.960727e-29 FALSE NA NA 0.9832886
## lumB.mean.mean 2.980872e-09 FALSE NA NA 0.6404673
## lumG.mad.mean 1.165498e-07 FALSE NA NA 0.3604753
## lumG.mean.mean 1.000052e-02 FALSE NA NA 0.5993138
## lumR.mad.mean 5.440725e-06 FALSE NA NA 0.3837318
## resY.mad.nexp 1.839563e-53 FALSE NA NA 1.0000000
## min max.outdoor.fctr.N max.outdoor.fctr.Y
## CosSmlGR.mean 7.321306e-01 0.9727175 0.9704089
## CosSmlRB.mean 7.321306e-01 0.9727175 0.9704089
## lumB.mean.mean 1.977313e-01 0.5616800 0.5887858
## lumG.mad.mean 7.599882e-02 0.3496776 0.3357184
## lumG.mean.mean 1.422352e-01 0.5253529 0.5311267
## lumR.mad.mean 1.104682e-01 0.3232053 0.3166458
## resY.mad.nexp 8.904719e-122 1.0000000 1.0000000
## min.outdoor.fctr.N min.outdoor.fctr.Y
## CosSmlGR.mean 8.492029e-01 7.913946e-01
## CosSmlRB.mean 8.492029e-01 7.913946e-01
## lumB.mean.mean 2.701446e-01 2.302221e-01
## lumG.mad.mean 1.146528e-01 9.712997e-02
## lumG.mean.mean 1.817476e-01 1.923653e-01
## lumR.mad.mean 1.466861e-01 1.411135e-01
## resY.mad.nexp 3.268701e-81 3.268701e-81
## max.outdoor.fctr.All.X..rcv.glmnet.N
## CosSmlGR.mean 0.9734692
## CosSmlRB.mean 0.9734692
## lumB.mean.mean 0.5140038
## lumG.mad.mean 0.2750078
## lumG.mean.mean 0.4361654
## lumR.mad.mean 0.2889616
## resY.mad.nexp 1.0000000
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## CosSmlGR.mean 0.9774375
## CosSmlRB.mean 0.9774375
## lumB.mean.mean 0.5464919
## lumG.mad.mean 0.3330659
## lumG.mean.mean 0.5108060
## lumR.mad.mean 0.3662894
## resY.mad.nexp 1.0000000
## min.outdoor.fctr.All.X..rcv.glmnet.N
## CosSmlGR.mean 7.730400e-01
## CosSmlRB.mean 7.730400e-01
## lumB.mean.mean 2.351705e-01
## lumG.mad.mean 7.599882e-02
## lumG.mean.mean 1.453933e-01
## lumR.mad.mean 1.437851e-01
## resY.mad.nexp 4.477805e-85
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## CosSmlGR.mean 8.441096e-01
## CosSmlRB.mean 8.441096e-01
## lumB.mean.mean 2.374392e-01
## lumG.mad.mean 1.172004e-01
## lumG.mean.mean 2.011802e-01
## lumR.mad.mean 1.473628e-01
## resY.mad.nexp 3.268701e-81
## max.outdoor.fctr.Final..rcv.glmnet.Y
## CosSmlGR.mean 0.9832886
## CosSmlRB.mean 0.9832886
## lumB.mean.mean 0.6404673
## lumG.mad.mean 0.3604753
## lumG.mean.mean 0.5993138
## lumR.mad.mean 0.3837318
## resY.mad.nexp 1.0000000
## min.outdoor.fctr.Final..rcv.glmnet.Y
## CosSmlGR.mean 7.321306e-01
## CosSmlRB.mean 7.321306e-01
## lumB.mean.mean 1.977313e-01
## lumG.mad.mean 9.128165e-02
## lumG.mean.mean 1.422352e-01
## lumR.mad.mean 1.104682e-01
## resY.mad.nexp 8.904719e-122
## [1] "OOBobs outdoor.fctr.All.X..rcv.glmnet N: max > max of Train range: 7"
## business_id outdoor.fctr.All.X..rcv.glmnet CosSmlBG.mad CosSmlGR.mad
## 391 1634 N 0.008898536 0.02373338
## 662 2146 N 0.021227263 0.04738147
## 679 2185 N 0.010060156 0.02379174
## 1490 3706 N 0.010722310 0.02682689
## 478 180 N 0.023915287 0.05065459
## 1512 3748 N 0.006087318 0.01589641
## 1842 723 N 0.073493833 0.14100208
## CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumR.mad.mad lumR.mean.mean
## 391 0.9516302 0.02373338 0.9516302 0.09913012 0.6507783
## 662 0.9022625 0.04738147 0.9022625 0.13792017 0.5110142
## 679 0.9532509 0.02379174 0.9532509 0.09482012 0.6527598
## 1490 0.9471822 0.02682689 0.9471822 0.19395024 0.5654659
## 478 0.9074390 0.05065459 0.9074390 0.18102023 0.5080014
## 1512 0.9734692 0.01589641 0.9734692 0.09051011 0.6223940
## 1842 0.7730400 0.14100208 0.7730400 0.07758010 0.4160745
## resY.mad resY.mad.log1p resY.mad.root2
## 391 62.2692 4.147399 7.891084
## 662 194.2206 5.274130 13.936305
## 679 0.0000 0.000000 0.000000
## 1490 88.2147 4.491046 9.392268
## 478 0.0000 0.000000 0.000000
## 1512 0.0000 0.000000 0.000000
## 1842 34.8411 3.579095 5.902635
## id cor.y exclude.as.feat cor.y.abs
## CosSmlBG.mad CosSmlBG.mad -0.046206836 FALSE 0.046206836
## CosSmlGR.mad CosSmlGR.mad -0.003587615 FALSE 0.003587615
## CosSmlGR.mean CosSmlGR.mean 0.021022718 FALSE 0.021022718
## CosSmlRB.mad CosSmlRB.mad -0.003587615 FALSE 0.003587615
## CosSmlRB.mean CosSmlRB.mean 0.021022718 FALSE 0.021022718
## lumR.mad.mad lumR.mad.mad 0.013705157 FALSE 0.013705157
## lumR.mean.mean lumR.mean.mean -0.115393376 FALSE 0.115393376
## resY.mad resY.mad 0.007630633 FALSE 0.007630633
## resY.mad.log1p resY.mad.log1p -0.001526058 FALSE 0.001526058
## resY.mad.root2 resY.mad.root2 0.002557583 FALSE 0.002557583
## cor.high.X freqRatio percentUnique zeroVar nzv
## CosSmlBG.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70 FALSE FALSE
## CosSmlRB.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70 FALSE FALSE
## lumR.mad.mad <NA> 1.020576 2.30 FALSE FALSE
## lumR.mean.mean <NA> 1.000000 100.00 FALSE FALSE
## resY.mad <NA> 5.354497 9.05 FALSE FALSE
## resY.mad.log1p <NA> 5.354497 9.05 FALSE FALSE
## resY.mad.root2 <NA> 5.354497 9.05 FALSE FALSE
## is.cor.y.abs.low interaction.feat shapiro.test.p.value
## CosSmlBG.mad FALSE NA 7.694894e-38
## CosSmlGR.mad TRUE NA 1.304267e-33
## CosSmlGR.mean FALSE NA 9.960727e-29
## CosSmlRB.mad TRUE NA 1.304267e-33
## CosSmlRB.mean FALSE NA 9.960727e-29
## lumR.mad.mad FALSE NA 1.539005e-12
## lumR.mean.mean FALSE NA 4.578209e-07
## resY.mad TRUE NA 3.711302e-48
## resY.mad.log1p TRUE NA 3.133148e-49
## resY.mad.root2 TRUE NA 1.717662e-47
## rsp_var_raw id_var rsp_var max min
## CosSmlBG.mad FALSE NA NA 0.08290404 0.0000000
## CosSmlGR.mad FALSE NA NA 0.19718208 0.0000000
## CosSmlGR.mean FALSE NA NA 0.98328863 0.7321306
## CosSmlRB.mad FALSE NA NA 0.19718208 0.0000000
## CosSmlRB.mean FALSE NA NA 0.98328863 0.7321306
## lumR.mad.mad FALSE NA NA 0.29308037 0.0000000
## lumR.mean.mean FALSE NA NA 0.69867499 0.2684265
## resY.mad FALSE NA NA 278.72880000 0.0000000
## resY.mad.log1p FALSE NA NA 5.63382056 0.0000000
## resY.mad.root2 FALSE NA NA 16.69517296 0.0000000
## max.outdoor.fctr.N max.outdoor.fctr.Y min.outdoor.fctr.N
## CosSmlBG.mad 0.05922465 0.05333009 0.003496574
## CosSmlGR.mad 0.12141263 0.10218826 0.009921125
## CosSmlGR.mean 0.97271750 0.97040893 0.849202950
## CosSmlRB.mad 0.12141263 0.10218826 0.009921125
## CosSmlRB.mean 0.97271750 0.97040893 0.849202950
## lumR.mad.mad 0.17671022 0.20688026 0.025860032
## lumR.mean.mean 0.63289326 0.64409372 0.329859475
## resY.mad 185.32500000 185.32500000 0.000000000
## resY.mad.log1p 5.22749246 5.22749246 0.000000000
## resY.mad.root2 13.61341250 13.61341250 0.000000000
## min.outdoor.fctr.Y max.outdoor.fctr.All.X..rcv.glmnet.N
## CosSmlBG.mad 0.004274444 0.07349383
## CosSmlGR.mad 0.010443564 0.14100208
## CosSmlGR.mean 0.791394595 0.97346917
## CosSmlRB.mad 0.010443564 0.14100208
## CosSmlRB.mean 0.791394595 0.97346917
## lumR.mad.mad 0.012930016 0.19395024
## lumR.mean.mean 0.269941538 0.65275982
## resY.mad 0.000000000 194.22060000
## resY.mad.log1p 0.000000000 5.27413020
## resY.mad.root2 0.000000000 13.93630511
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## CosSmlBG.mad 0.05448611
## CosSmlGR.mad 0.13184104
## CosSmlGR.mean 0.97743753
## CosSmlRB.mad 0.13184104
## CosSmlRB.mean 0.97743753
## lumR.mad.mad 0.18964024
## lumR.mean.mean 0.66918014
## resY.mad 185.32500000
## resY.mad.log1p 5.22749246
## resY.mad.root2 13.61341250
## min.outdoor.fctr.All.X..rcv.glmnet.N
## CosSmlBG.mad 0.005967801
## CosSmlGR.mad 0.015439896
## CosSmlGR.mean 0.773040009
## CosSmlRB.mad 0.015439896
## CosSmlRB.mean 0.773040009
## lumR.mad.mad 0.043100054
## lumR.mean.mean 0.416074523
## resY.mad 0.000000000
## resY.mad.log1p 0.000000000
## resY.mad.root2 0.000000000
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## CosSmlBG.mad 0.002835269
## CosSmlGR.mad 0.005660636
## CosSmlGR.mean 0.844109620
## CosSmlRB.mad 0.005660636
## CosSmlRB.mean 0.844109620
## lumR.mad.mad 0.025860032
## lumR.mean.mean 0.297988719
## resY.mad 0.000000000
## resY.mad.log1p 0.000000000
## resY.mad.root2 0.000000000
## max.outdoor.fctr.Final..rcv.glmnet.Y
## CosSmlBG.mad 0.08290404
## CosSmlGR.mad 0.19718208
## CosSmlGR.mean 0.98328863
## CosSmlRB.mad 0.19718208
## CosSmlRB.mean 0.98328863
## lumR.mad.mad 0.29308037
## lumR.mean.mean 0.69867499
## resY.mad 278.72880000
## resY.mad.log1p 5.63382056
## resY.mad.root2 16.69517296
## min.outdoor.fctr.Final..rcv.glmnet.Y
## CosSmlBG.mad 0.0000000
## CosSmlGR.mad 0.0000000
## CosSmlGR.mean 0.7321306
## CosSmlRB.mad 0.0000000
## CosSmlRB.mean 0.7321306
## lumR.mad.mad 0.0000000
## lumR.mean.mean 0.2684265
## resY.mad 0.0000000
## resY.mad.log1p 0.0000000
## resY.mad.root2 0.0000000
## [1] "OOBobs total range outliers: 55"
## [1] "newobs outdoor.fctr.Final..rcv.glmnet Y: min < min of Train range: 111"
## business_id outdoor.fctr.Final..rcv.glmnet CorBG.mad CorBG.mean
## 2010 01i5j Y 0.02366618 0.9421127
## 2013 01pyb Y 0.01582174 0.9531754
## 2126 0fbnt Y 0.04947602 0.9361052
## 2222 0qj4g Y 0.01790504 0.9645024
## 2226 0re2p Y 0.05481874 0.9187171
## 2316 12p62 Y 0.00000000 0.9719078
## CorGR.mad CorGR.mean CorRB.mad CosSmlBG.mad CosSmlBG.mean
## 2010 0.05908480 0.8249196 0.029184688 0.005247007 0.9670846
## 2013 0.03762769 0.8469216 0.008792465 0.006724164 0.9807304
## 2126 0.09439177 0.8705689 0.022854570 0.008093206 0.9716331
## 2222 0.13290443 0.8437944 0.040944955 0.009897895 0.9856815
## 2226 0.16284073 0.7188341 0.058713951 0.020163209 0.9638564
## 2316 0.00000000 0.8533018 0.000000000 0.000000000 0.9895442
## CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad
## 2010 0.040086841 0.9043454 0.040086841 0.9043454 0.08189010
## 2013 0.019259221 0.9418881 0.019259221 0.9418881 0.04310005
## 2126 0.009113256 0.9415882 0.009113256 0.9415882 0.03448004
## 2222 0.015806328 0.9540358 0.015806328 0.9540358 0.12068015
## 2226 0.067291769 0.8981738 0.067291769 0.8981738 0.07758010
## 2316 0.000000000 0.9510148 0.000000000 0.9510148 0.00000000
## lumB.mad.mean lumB.mean.mad lumB.mean.mean lumG.mad.mad lumG.mean.mad
## 2010 0.1337247 0.11935204 0.2073095 0.05172006 0.07241045
## 2013 0.2919979 0.09494715 0.4914989 0.06034008 0.08261126
## 2126 0.2407045 0.07541078 0.3527358 0.08620011 0.06851804
## 2222 0.2664804 0.06015485 0.4518849 0.09051011 0.01857167
## 2226 0.2306267 0.02485645 0.3927527 0.09482012 0.15822878
## 2316 0.2732635 0.00000000 0.2774364 0.00000000 0.00000000
## lumG.mean.mean lumR.mad.mad lumR.mad.mean lumR.mean.mad
## 2010 0.1422352 0.06465008 0.1872146 0.10081428
## 2013 0.4396225 0.05172006 0.2448390 0.04961895
## 2126 0.3118877 0.08620011 0.2662866 0.12558911
## 2222 0.4086727 0.08620011 0.2451620 0.07187454
## 2226 0.3169907 0.12930016 0.2453558 0.12804094
## 2316 0.2460082 0.00000000 0.2907059 0.00000000
## lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resX.mean
## 2010 0.2970919 10 2.3978953 3.162278 437.5000
## 2013 0.5596896 9 2.3025851 3.000000 444.4444
## 2126 0.4231807 5 1.7917595 2.236068 424.6000
## 2222 0.5190787 12 2.5649494 3.464102 382.1667
## 2226 0.4918638 15 2.7725887 3.872983 399.2000
## 2316 0.3222253 1 0.6931472 1.000000 500.0000
## resX.mean.log1p resX.mean.root2 resXY.mean resXY.mean.log1p
## 2010 6.083360 20.91650 180350.0 12.10266
## 2013 6.099073 21.08185 185666.7 12.13171
## 2126 6.053500 20.60582 187300.0 12.14047
## 2222 5.948470 19.54908 171250.5 12.05089
## 2226 5.991964 19.97999 177366.7 12.08598
## 2316 6.216606 22.36068 187500.0 12.14154
## resXY.mean.root2 resY.mad.nexp resY.mean resY.mean.log1p
## 2010 424.6763 5.717255e-41 423.2000 6.050205
## 2013 430.8906 1.325257e-13 426.8889 6.058864
## 2126 432.7817 1.000000e+00 450.0000 6.111467
## 2222 413.8242 1.000000e+00 414.3333 6.029081
## 2226 421.1492 1.000000e+00 455.5333 6.123662
## 2316 433.0127 1.000000e+00 375.0000 5.929589
## resY.mean.root2
## 2010 20.57183
## 2013 20.66129
## 2126 21.21320
## 2222 20.35518
## 2226 21.34323
## 2316 19.36492
## business_id outdoor.fctr.Final..rcv.glmnet CorBG.mad CorBG.mean
## 2363 18cak Y 0.04843046 0.9061263
## 3417 54133 Y 0.13146264 0.7793220
## 5840 dl740 Y 0.01005380 0.9583046
## 6344 fhuf5 Y 0.01571185 0.9577033
## 7822 kx8b6 Y 0.01445091 0.9303817
## 10239 tiww7 Y 0.02575783 0.9333274
## CorGR.mad CorGR.mean CorRB.mad CosSmlBG.mad CosSmlBG.mean
## 2363 0.18821333 0.7515425 0.05866856 0.017916291 0.9675051
## 3417 0.30087584 0.6055563 0.07870478 0.082904035 0.8910327
## 5840 0.07773633 0.8635691 0.01747529 0.002442151 0.9847750
## 6344 0.05463138 0.8846856 0.02859872 0.004864489 0.9849579
## 7822 0.07189722 0.7963182 0.03868710 0.002681409 0.9796477
## 10239 0.07733899 0.8707528 0.03750505 0.004767303 0.9758537
## CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad
## 2363 0.066494029 0.9095414 0.066494029 0.9095414 0.12068015
## 3417 0.139249278 0.8338463 0.139249278 0.8338463 0.08620011
## 5840 0.025246136 0.9464576 0.025246136 0.9464576 0.04741006
## 6344 0.008794331 0.9641203 0.008794331 0.9641203 0.14654018
## 7822 0.038346600 0.9405271 0.038346600 0.9405271 0.12068015
## 10239 0.018962089 0.9584892 0.018962089 0.9584892 0.06034008
## lumB.mad.mean lumB.mean.mad lumB.mean.mean lumG.mad.mad
## 2363 0.1383760 0.34373495 0.4928284 0.12930016
## 3417 0.2440511 0.08029972 0.4574431 0.10344013
## 5840 0.1976800 0.05882902 0.3643716 0.12068015
## 6344 0.2224238 0.13411873 0.5077292 0.13792017
## 7822 0.2093082 0.19133734 0.3499551 0.10344013
## 10239 0.2337275 0.01936594 0.3852980 0.01724002
## lumG.mean.mad lumG.mean.mean lumR.mad.mad lumR.mad.mean
## 2363 0.37736830 0.4627857 0.10344013 0.1228717
## 3417 0.14662326 0.2932435 0.08620011 0.2397969
## 5840 0.06762825 0.2966613 0.10344013 0.2194829
## 6344 0.14461062 0.4654995 0.15516019 0.2240463
## 7822 0.17690865 0.3012687 0.07758010 0.2375482
## 10239 0.07266650 0.3263748 0.04310005 0.2104711
## lumR.mean.mad lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resX.mean
## 2363 0.36275226 0.5250151 15 2.772589 3.872983 343.2667
## 3417 0.12537705 0.5612452 41 3.737670 6.403124 397.1707
## 5840 0.03603546 0.4663652 4 1.609438 2.000000 374.0000
## 6344 0.12272477 0.5332039 43 3.784190 6.557439 468.4651
## 7822 0.21096230 0.4446801 7 2.079442 2.645751 410.7143
## 10239 0.02898572 0.4609971 5 1.791759 2.236068 425.0000
## resX.mean.log1p resX.mean.root2 resXY.mean resXY.mean.log1p
## 2363 5.841417 18.52746 122284.5 11.71411
## 3417 5.986881 19.92914 155952.2 11.95731
## 5840 5.926926 19.33908 187000.0 12.13887
## 6344 6.151594 21.64405 165744.2 12.01821
## 7822 6.020330 20.26609 187500.0 12.14154
## 10239 6.054439 20.61553 187300.0 12.14047
## resXY.mean.root2 resY.mad.nexp resY.mean resY.mean.log1p
## 2363 349.6920 2.635452e-50 341.2000 5.835395
## 3417 394.9078 3.857670e-36 398.4634 5.990122
## 5840 432.4350 1.000000e+00 500.0000 6.216606
## 6344 407.1169 1.988635e-37 363.0233 5.897218
## 7822 433.0127 1.000000e+00 464.2857 6.142652
## 10239 432.7817 1.000000e+00 449.6000 6.110580
## resY.mean.root2
## 2363 18.47160
## 3417 19.96155
## 5840 22.36068
## 6344 19.05317
## 7822 21.54729
## 10239 21.20377
## business_id outdoor.fctr.Final..rcv.glmnet CorBG.mad CorBG.mean
## 11485 y3dhm Y 0.03660985 0.9376582
## 11636 yo6u1 Y 0.09314190 0.9061871
## 11669 ysmbo Y 0.03464467 0.9017570
## 11901 zn28q Y 0.04265582 0.8947820
## 11906 znmff Y 0.02524109 0.9249660
## 11911 zo9zr Y 0.12017548 0.7305993
## CorGR.mad CorGR.mean CorRB.mad CosSmlBG.mad CosSmlBG.mean
## 11485 0.08265092 0.8602864 0.01503561 0.01415085 0.9767272
## 11636 0.20551589 0.7884210 0.05150626 0.02175515 0.9636961
## 11669 0.06555322 0.7706458 0.04068253 0.01828142 0.9436246
## 11901 0.15715612 0.7785503 0.10219450 0.03036623 0.9362634
## 11906 0.09141746 0.7956711 0.03463955 0.01455096 0.9742303
## 11911 0.19700372 0.5871819 0.03470658 0.05523727 0.8972047
## CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad
## 11485 0.01616932 0.9479147 0.01616932 0.9479147 0.02155003
## 11636 0.05426148 0.9334461 0.05426148 0.9334461 0.07758010
## 11669 0.09553192 0.8732856 0.09553192 0.8732856 0.06034008
## 11901 0.08451788 0.8623249 0.08451788 0.8623249 0.07758010
## 11906 0.03603518 0.9308919 0.03603518 0.9308919 0.10344013
## 11911 0.07780971 0.8501639 0.07780971 0.8501639 0.11206014
## lumB.mad.mean lumB.mean.mad lumB.mean.mean lumG.mad.mad
## 11485 0.2093082 0.07109822 0.3573475 0.01724002
## 11636 0.2458541 0.05741272 0.4112778 0.08620011
## 11669 0.2433624 0.02669663 0.3026559 0.11206014
## 11901 0.1046541 0.15250387 0.1977313 0.08189010
## 11906 0.2828397 0.04918105 0.3982522 0.06896009
## 11911 0.2526049 0.09391096 0.4164591 0.06896009
## lumG.mean.mad lumG.mean.mean lumR.mad.mad lumR.mad.mean
## 11485 0.06610516 0.3311871 0.043100054 0.2553367
## 11636 0.02257313 0.3267541 0.008620011 0.2765859
## 11669 0.03609714 0.2832583 0.077580097 0.2541600
## 11901 0.10714085 0.2135245 0.086200108 0.1664291
## 11906 0.08086176 0.3354951 0.068960087 0.3105423
## 11911 0.10093486 0.2540455 0.103440130 0.2609755
## lumR.mean.mad lumR.mean.mean nImgs nImgs.log1p nImgs.root2 resX.mean
## 11485 0.07390533 0.3997994 12 2.564949 3.464102 447.9167
## 11636 0.08854092 0.5127268 7 2.079442 2.645751 446.1429
## 11669 0.07662101 0.3700176 7 2.079442 2.645751 468.7143
## 11901 0.14169112 0.2684265 24 3.218876 4.898979 478.4583
## 11906 0.10353223 0.4704007 17 2.890372 4.123106 344.2941
## 11911 0.14590074 0.5399337 141 4.955827 11.874342 433.0071
## resX.mean.log1p resX.mean.root2 resXY.mean resXY.mean.log1p
## 11485 6.106837 21.16404 187333.3 12.14065
## 11636 6.102878 21.12209 195642.9 12.18405
## 11669 6.152125 21.64981 173928.6 12.06641
## 11901 6.172657 21.87369 192542.4 12.16808
## 11906 5.844397 18.55516 127450.6 11.75549
## 11911 6.073061 20.80882 185996.5 12.13349
## resXY.mean.root2 resY.mad.nexp resY.mean resY.mean.log1p
## 11485 432.8202 5.155016e-02 426.7500 6.058539
## 11636 442.3153 1.000000e+00 445.1429 6.100639
## 11669 417.0474 1.000000e+00 379.1429 5.940547
## 11901 438.7965 1.000000e+00 406.6250 6.010348
## 11906 357.0022 3.268701e-81 305.3529 5.724738
## 11911 431.2731 1.000000e+00 438.9858 6.086742
## resY.mean.root2
## 11485 20.65793
## 11636 21.09841
## 11669 19.47159
## 11901 20.16494
## 11906 17.47435
## 11911 20.95199
## id cor.y exclude.as.feat cor.y.abs
## CorBG.mad CorBG.mad 0.003604604 FALSE 0.003604604
## CorBG.mean CorBG.mean 0.016157691 FALSE 0.016157691
## CorGR.mad CorGR.mad 0.030253289 FALSE 0.030253289
## CorGR.mean CorGR.mean 0.004925319 FALSE 0.004925319
## CorRB.mad CorRB.mad 0.038198961 FALSE 0.038198961
## CosSmlBG.mad CosSmlBG.mad -0.046206836 FALSE 0.046206836
## CosSmlBG.mean CosSmlBG.mean 0.030023372 FALSE 0.030023372
## CosSmlGR.mad CosSmlGR.mad -0.003587615 FALSE 0.003587615
## CosSmlGR.mean CosSmlGR.mean 0.021022718 FALSE 0.021022718
## CosSmlRB.mad CosSmlRB.mad -0.003587615 FALSE 0.003587615
## CosSmlRB.mean CosSmlRB.mean 0.021022718 FALSE 0.021022718
## lumB.mad.mad lumB.mad.mad 0.038630626 FALSE 0.038630626
## lumB.mad.mean lumB.mad.mean 0.019323904 FALSE 0.019323904
## lumB.mean.mad lumB.mean.mad 0.049622224 FALSE 0.049622224
## lumB.mean.mean lumB.mean.mean -0.032529239 FALSE 0.032529239
## lumG.mad.mad lumG.mad.mad 0.069818982 FALSE 0.069818982
## lumG.mean.mad lumG.mean.mad 0.051616272 FALSE 0.051616272
## lumG.mean.mean lumG.mean.mean 0.052492718 FALSE 0.052492718
## lumR.mad.mad lumR.mad.mad 0.013705157 FALSE 0.013705157
## lumR.mad.mean lumR.mad.mean 0.036065638 FALSE 0.036065638
## lumR.mean.mad lumR.mean.mad 0.015642413 FALSE 0.015642413
## lumR.mean.mean lumR.mean.mean -0.115393376 FALSE 0.115393376
## nImgs nImgs -0.014963676 FALSE 0.014963676
## nImgs.log1p nImgs.log1p 0.047250893 FALSE 0.047250893
## nImgs.root2 nImgs.root2 0.014028124 FALSE 0.014028124
## resX.mean resX.mean -0.017726551 FALSE 0.017726551
## resX.mean.log1p resX.mean.log1p -0.015059015 FALSE 0.015059015
## resX.mean.root2 resX.mean.root2 -0.016434019 FALSE 0.016434019
## resXY.mean resXY.mean -0.009002880 FALSE 0.009002880
## resXY.mean.log1p resXY.mean.log1p -0.004867571 FALSE 0.004867571
## resXY.mean.root2 resXY.mean.root2 -0.007039955 FALSE 0.007039955
## resY.mad.nexp resY.mad.nexp 0.012190340 FALSE 0.012190340
## resY.mean resY.mean 0.012599188 FALSE 0.012599188
## resY.mean.log1p resY.mean.log1p 0.013625190 FALSE 0.013625190
## resY.mean.root2 resY.mean.root2 0.013106506 FALSE 0.013106506
## cor.high.X freqRatio percentUnique zeroVar nzv
## CorBG.mad <NA> 1.000000 100.00 FALSE FALSE
## CorBG.mean CosSmlBG.mean 1.000000 99.90 FALSE FALSE
## CorGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CorGR.mean <NA> 2.000000 99.95 FALSE FALSE
## CorRB.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlBG.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlBG.mean CosSmlBG.mad 1.000000 99.70 FALSE FALSE
## CosSmlGR.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70 FALSE FALSE
## CosSmlRB.mad <NA> 1.000000 100.00 FALSE FALSE
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70 FALSE FALSE
## lumB.mad.mad <NA> 1.022624 2.75 FALSE FALSE
## lumB.mad.mean <NA> 1.200000 92.35 FALSE FALSE
## lumB.mean.mad lumG.mean.mad 1.000000 100.00 FALSE FALSE
## lumB.mean.mean lumG.mean.mean 2.000000 99.95 FALSE FALSE
## lumG.mad.mad <NA> 1.049261 2.40 FALSE FALSE
## lumG.mean.mad <NA> 2.000000 99.95 FALSE FALSE
## lumG.mean.mean <NA> 2.000000 99.95 FALSE FALSE
## lumR.mad.mad <NA> 1.020576 2.30 FALSE FALSE
## lumR.mad.mean <NA> 1.142857 93.95 FALSE FALSE
## lumR.mean.mad <NA> 2.000000 99.95 FALSE FALSE
## lumR.mean.mean <NA> 1.000000 100.00 FALSE FALSE
## nImgs <NA> 1.033333 19.10 FALSE FALSE
## nImgs.log1p nImgs.cut.fctr 1.033333 19.10 FALSE FALSE
## nImgs.root2 nImgs.log1p 1.033333 19.10 FALSE FALSE
## resX.mean <NA> 2.000000 97.75 FALSE FALSE
## resX.mean.log1p resX.mean 2.000000 97.60 FALSE FALSE
## resX.mean.root2 resX.mean 2.000000 97.45 FALSE FALSE
## resXY.mean <NA> 6.000000 98.55 FALSE FALSE
## resXY.mean.log1p <NA> 4.000000 90.80 FALSE FALSE
## resXY.mean.root2 <NA> 6.000000 98.20 FALSE FALSE
## resY.mad.nexp <NA> 5.354497 9.05 FALSE FALSE
## resY.mean resY.mean.root2 1.666667 98.15 FALSE FALSE
## resY.mean.log1p <NA> 1.666667 97.90 FALSE FALSE
## resY.mean.root2 resY.mean.log1p 1.666667 97.85 FALSE FALSE
## is.cor.y.abs.low interaction.feat shapiro.test.p.value
## CorBG.mad TRUE NA 1.066454e-34
## CorBG.mean FALSE NA 8.214498e-35
## CorGR.mad FALSE NA 7.392587e-28
## CorGR.mean TRUE NA 1.178044e-23
## CorRB.mad FALSE NA 2.592335e-42
## CosSmlBG.mad FALSE NA 7.694894e-38
## CosSmlBG.mean FALSE NA 1.504448e-37
## CosSmlGR.mad TRUE NA 1.304267e-33
## CosSmlGR.mean FALSE NA 9.960727e-29
## CosSmlRB.mad TRUE NA 1.304267e-33
## CosSmlRB.mean FALSE NA 9.960727e-29
## lumB.mad.mad FALSE NA 5.263514e-19
## lumB.mad.mean FALSE NA 2.584492e-12
## lumB.mean.mad FALSE NA 1.492315e-16
## lumB.mean.mean FALSE NA 2.980872e-09
## lumG.mad.mad FALSE NA 1.047894e-17
## lumG.mean.mad FALSE NA 6.299586e-18
## lumG.mean.mean FALSE NA 1.000052e-02
## lumR.mad.mad FALSE NA 1.539005e-12
## lumR.mad.mean FALSE NA 5.440725e-06
## lumR.mean.mad FALSE NA 3.610555e-14
## lumR.mean.mean FALSE NA 4.578209e-07
## nImgs FALSE NA 1.364097e-61
## nImgs.log1p FALSE NA 1.234907e-13
## nImgs.root2 FALSE NA 4.118632e-46
## resX.mean FALSE NA 1.161337e-19
## resX.mean.log1p FALSE NA 2.973500e-25
## resX.mean.root2 FALSE NA 1.959497e-22
## resXY.mean FALSE NA 2.964553e-36
## resXY.mean.log1p TRUE NA 6.980019e-43
## resXY.mean.root2 TRUE NA 1.780045e-39
## resY.mad.nexp FALSE NA 1.839563e-53
## resY.mean FALSE NA 1.464051e-21
## resY.mean.log1p FALSE NA 3.854130e-28
## resY.mean.root2 FALSE NA 7.216658e-25
## rsp_var_raw id_var rsp_var max min
## CorBG.mad FALSE NA NA 2.243888e-01 0.000000e+00
## CorBG.mean FALSE NA NA 9.873230e-01 3.973767e-01
## CorGR.mad FALSE NA NA 4.591960e-01 0.000000e+00
## CorGR.mean FALSE NA NA 9.563650e-01 9.720700e-02
## CorRB.mad FALSE NA NA 2.867121e-01 0.000000e+00
## CosSmlBG.mad FALSE NA NA 8.290404e-02 0.000000e+00
## CosSmlBG.mean FALSE NA NA 9.952656e-01 7.560239e-01
## CosSmlGR.mad FALSE NA NA 1.971821e-01 0.000000e+00
## CosSmlGR.mean FALSE NA NA 9.832886e-01 7.321306e-01
## CosSmlRB.mad FALSE NA NA 1.971821e-01 0.000000e+00
## CosSmlRB.mean FALSE NA NA 9.832886e-01 7.321306e-01
## lumB.mad.mad FALSE NA NA 2.672203e-01 0.000000e+00
## lumB.mad.mean FALSE NA NA 3.621365e-01 1.046541e-01
## lumB.mean.mad FALSE NA NA 3.437349e-01 0.000000e+00
## lumB.mean.mean FALSE NA NA 6.404673e-01 1.977313e-01
## lumG.mad.mad FALSE NA NA 3.017004e-01 0.000000e+00
## lumG.mean.mad FALSE NA NA 3.773683e-01 0.000000e+00
## lumG.mean.mean FALSE NA NA 5.993138e-01 1.422352e-01
## lumR.mad.mad FALSE NA NA 2.930804e-01 0.000000e+00
## lumR.mad.mean FALSE NA NA 3.837318e-01 1.104682e-01
## lumR.mean.mad FALSE NA NA 3.627523e-01 0.000000e+00
## lumR.mean.mean FALSE NA NA 6.986750e-01 2.684265e-01
## nImgs FALSE NA NA 2.974000e+03 1.000000e+00
## nImgs.log1p FALSE NA NA 7.997999e+00 6.931472e-01
## nImgs.root2 FALSE NA NA 5.453439e+01 1.000000e+00
## resX.mean FALSE NA NA 5.000000e+02 2.837692e+02
## resX.mean.log1p FALSE NA NA 6.216606e+00 5.651679e+00
## resX.mean.root2 FALSE NA NA 2.236068e+01 1.684545e+01
## resXY.mean FALSE NA NA 2.500000e+05 8.762615e+04
## resXY.mean.log1p FALSE NA NA 1.242922e+01 1.138085e+01
## resXY.mean.root2 FALSE NA NA 5.000000e+02 2.960172e+02
## resY.mad.nexp FALSE NA NA 1.000000e+00 8.904719e-122
## resY.mean FALSE NA NA 5.000000e+02 2.530000e+02
## resY.mean.log1p FALSE NA NA 6.216606e+00 5.537334e+00
## resY.mean.root2 FALSE NA NA 2.236068e+01 1.590597e+01
## max.outdoor.fctr.N max.outdoor.fctr.Y min.outdoor.fctr.N
## CorBG.mad 1.255981e-01 1.740408e-01 7.895506e-03
## CorBG.mean 9.754135e-01 9.763031e-01 6.990869e-01
## CorGR.mad 2.865703e-01 2.998611e-01 2.569433e-03
## CorGR.mean 9.143933e-01 9.168114e-01 5.526847e-01
## CorRB.mad 1.707495e-01 2.867121e-01 7.217628e-03
## CosSmlBG.mad 5.922465e-02 7.349383e-02 3.181644e-03
## CosSmlBG.mean 9.919500e-01 9.919660e-01 8.793939e-01
## CosSmlGR.mad 1.262114e-01 1.410021e-01 5.660636e-03
## CosSmlGR.mean 9.748654e-01 9.774375e-01 8.416038e-01
## CosSmlRB.mad 1.262114e-01 1.410021e-01 5.660636e-03
## CosSmlRB.mean 9.748654e-01 9.774375e-01 8.416038e-01
## lumB.mad.mad 2.672203e-01 2.068803e-01 8.620011e-03
## lumB.mad.mean 3.621365e-01 3.130679e-01 1.094111e-01
## lumB.mean.mad 2.318285e-01 3.251330e-01 2.497502e-02
## lumB.mean.mean 5.616800e-01 5.887858e-01 2.374392e-01
## lumG.mad.mad 2.068803e-01 3.017004e-01 2.586003e-02
## lumG.mean.mad 2.696606e-01 3.267359e-01 1.148923e-02
## lumG.mean.mean 5.253529e-01 5.311267e-01 1.817476e-01
## lumR.mad.mad 1.939502e-01 2.068803e-01 2.586003e-02
## lumR.mad.mean 3.662894e-01 3.166458e-01 1.437851e-01
## lumR.mean.mad 2.805100e-01 2.855381e-01 3.823550e-03
## lumR.mean.mean 6.691801e-01 6.617843e-01 2.979887e-01
## nImgs 2.974000e+03 1.954000e+03 2.000000e+00
## nImgs.log1p 7.997999e+00 7.578145e+00 1.098612e+00
## nImgs.root2 5.453439e+01 4.420407e+01 1.414214e+00
## resX.mean 5.000000e+02 5.000000e+02 2.837692e+02
## resX.mean.log1p 6.216606e+00 6.216606e+00 5.651679e+00
## resX.mean.root2 2.236068e+01 2.236068e+01 1.684545e+01
## resXY.mean 2.175740e+05 2.182778e+05 8.762615e+04
## resXY.mean.log1p 1.229030e+01 1.229353e+01 1.138085e+01
## resXY.mean.root2 4.664483e+02 4.672021e+02 2.960172e+02
## resY.mad.nexp 1.000000e+00 1.000000e+00 4.477805e-85
## resY.mean 4.861111e+02 5.000000e+02 2.530000e+02
## resY.mean.log1p 6.188492e+00 6.216606e+00 5.537334e+00
## resY.mean.root2 2.204793e+01 2.236068e+01 1.590597e+01
## min.outdoor.fctr.Y max.outdoor.fctr.All.X..rcv.glmnet.N
## CorBG.mad 1.192589e-03 9.640539e-02
## CorBG.mean 7.976347e-01 9.638528e-01
## CorGR.mad 3.041371e-02 2.206953e-01
## CorGR.mean 6.038797e-01 8.924409e-01
## CorRB.mad 8.978361e-03 8.402239e-02
## CosSmlBG.mad 2.835269e-03 7.349383e-02
## CosSmlBG.mean 8.990240e-01 9.900264e-01
## CosSmlGR.mad 9.706923e-03 1.410021e-01
## CosSmlGR.mean 7.730400e-01 9.734692e-01
## CosSmlRB.mad 9.706923e-03 1.410021e-01
## CosSmlRB.mean 7.730400e-01 9.734692e-01
## lumB.mad.mad 2.586003e-02 1.896402e-01
## lumB.mad.mean 1.217545e-01 3.021552e-01
## lumB.mean.mad 2.948638e-02 2.000383e-01
## lumB.mean.mean 2.302221e-01 5.140038e-01
## lumG.mad.mad 1.724002e-02 1.680902e-01
## lumG.mean.mad 2.485611e-02 1.858523e-01
## lumG.mean.mean 1.453933e-01 4.361654e-01
## lumR.mad.mad 1.293002e-02 1.939502e-01
## lumR.mad.mean 1.411135e-01 2.889616e-01
## lumR.mean.mad 2.119836e-02 2.472881e-01
## lumR.mean.mean 2.699415e-01 6.527598e-01
## nImgs 2.000000e+00 2.020000e+02
## nImgs.log1p 1.098612e+00 5.313206e+00
## nImgs.root2 1.414214e+00 1.421267e+01
## resX.mean 3.475000e+02 4.687500e+02
## resX.mean.log1p 5.853638e+00 6.152201e+00
## resX.mean.root2 1.864135e+01 2.165064e+01
## resXY.mean 1.137250e+05 2.010079e+05
## resXY.mean.log1p 1.164155e+01 1.221110e+01
## resXY.mean.root2 3.372314e+02 4.483391e+02
## resY.mad.nexp 3.268701e-81 1.000000e+00
## resY.mean 2.915000e+02 4.758500e+02
## resY.mean.log1p 5.678465e+00 6.167202e+00
## resY.mean.root2 1.707337e+01 2.181399e+01
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## CorBG.mad 1.135332e-01
## CorBG.mean 9.763031e-01
## CorGR.mad 2.998611e-01
## CorGR.mean 9.159928e-01
## CorRB.mad 2.867121e-01
## CosSmlBG.mad 5.448611e-02
## CosSmlBG.mean 9.919660e-01
## CosSmlGR.mad 1.318410e-01
## CosSmlGR.mean 9.774375e-01
## CosSmlRB.mad 1.318410e-01
## CosSmlRB.mean 9.774375e-01
## lumB.mad.mad 2.068803e-01
## lumB.mad.mean 3.338965e-01
## lumB.mean.mad 2.318285e-01
## lumB.mean.mean 5.464919e-01
## lumG.mad.mad 2.284303e-01
## lumG.mean.mad 2.446775e-01
## lumG.mean.mean 5.108060e-01
## lumR.mad.mad 1.896402e-01
## lumR.mad.mean 3.662894e-01
## lumR.mean.mad 2.396022e-01
## lumR.mean.mean 6.691801e-01
## nImgs 2.239000e+03
## nImgs.log1p 7.714231e+00
## nImgs.root2 4.731807e+01
## resX.mean 5.000000e+02
## resX.mean.log1p 6.216606e+00
## resX.mean.root2 2.236068e+01
## resXY.mean 2.182778e+05
## resXY.mean.log1p 1.229353e+01
## resXY.mean.root2 4.672021e+02
## resY.mad.nexp 1.000000e+00
## resY.mean 4.861111e+02
## resY.mean.log1p 6.188492e+00
## resY.mean.root2 2.204793e+01
## min.outdoor.fctr.All.X..rcv.glmnet.N
## CorBG.mad 1.744157e-02
## CorBG.mean 8.254616e-01
## CorGR.mad 3.270109e-02
## CorGR.mean 6.682450e-01
## CorRB.mad 1.112447e-02
## CosSmlBG.mad 5.967801e-03
## CosSmlBG.mean 9.096928e-01
## CosSmlGR.mad 1.543990e-02
## CosSmlGR.mean 7.730400e-01
## CosSmlRB.mad 1.543990e-02
## CosSmlRB.mean 7.730400e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.220965e-01
## lumB.mean.mad 5.342117e-02
## lumB.mean.mean 2.351705e-01
## lumG.mad.mad 2.586003e-02
## lumG.mean.mad 3.819614e-02
## lumG.mean.mean 1.453933e-01
## lumR.mad.mad 4.310005e-02
## lumR.mad.mean 1.437851e-01
## lumR.mean.mad 3.556016e-02
## lumR.mean.mean 4.160745e-01
## nImgs 8.000000e+00
## nImgs.log1p 2.197225e+00
## nImgs.root2 2.828427e+00
## resX.mean 3.837000e+02
## resX.mean.log1p 5.952464e+00
## resX.mean.root2 1.958826e+01
## resXY.mean 1.445588e+05
## resXY.mean.log1p 1.188145e+01
## resXY.mean.root2 3.802089e+02
## resY.mad.nexp 4.477805e-85
## resY.mean 3.355882e+02
## resY.mean.log1p 5.818860e+00
## resY.mean.root2 1.831907e+01
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## CorBG.mad 8.948235e-03
## CorBG.mean 6.990869e-01
## CorGR.mad 2.569433e-03
## CorGR.mean 5.722977e-01
## CorRB.mad 9.440068e-03
## CosSmlBG.mad 2.835269e-03
## CosSmlBG.mean 8.793939e-01
## CosSmlGR.mad 5.660636e-03
## CosSmlGR.mean 8.441096e-01
## CosSmlRB.mad 5.660636e-03
## CosSmlRB.mean 8.441096e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.367933e-01
## lumB.mean.mad 2.497502e-02
## lumB.mean.mean 2.374392e-01
## lumG.mad.mad 1.724002e-02
## lumG.mean.mad 1.148923e-02
## lumG.mean.mean 2.011802e-01
## lumR.mad.mad 2.586003e-02
## lumR.mad.mean 1.473628e-01
## lumR.mean.mad 3.823550e-03
## lumR.mean.mean 2.979887e-01
## nImgs 2.000000e+00
## nImgs.log1p 1.098612e+00
## nImgs.root2 1.414214e+00
## resX.mean 3.683684e+02
## resX.mean.log1p 5.911795e+00
## resX.mean.root2 1.919293e+01
## resXY.mean 1.453448e+05
## resXY.mean.log1p 1.188687e+01
## resXY.mean.root2 3.812411e+02
## resY.mad.nexp 3.268701e-81
## resY.mean 3.114478e+02
## resY.mean.log1p 5.744437e+00
## resY.mean.root2 1.764788e+01
## max.outdoor.fctr.Final..rcv.glmnet.Y
## CorBG.mad 2.243888e-01
## CorBG.mean 9.873230e-01
## CorGR.mad 4.591960e-01
## CorGR.mean 9.563650e-01
## CorRB.mad 2.375609e-01
## CosSmlBG.mad 8.290404e-02
## CosSmlBG.mean 9.952656e-01
## CosSmlGR.mad 1.971821e-01
## CosSmlGR.mean 9.832886e-01
## CosSmlRB.mad 1.971821e-01
## CosSmlRB.mean 9.832886e-01
## lumB.mad.mad 2.629103e-01
## lumB.mad.mean 3.461636e-01
## lumB.mean.mad 3.437349e-01
## lumB.mean.mean 6.404673e-01
## lumG.mad.mad 2.758403e-01
## lumG.mean.mad 3.773683e-01
## lumG.mean.mean 5.993138e-01
## lumR.mad.mad 2.930804e-01
## lumR.mad.mean 3.837318e-01
## lumR.mean.mad 3.627523e-01
## lumR.mean.mean 6.986750e-01
## nImgs 2.825000e+03
## nImgs.log1p 7.946618e+00
## nImgs.root2 5.315073e+01
## resX.mean 5.000000e+02
## resX.mean.log1p 6.216606e+00
## resX.mean.root2 2.236068e+01
## resXY.mean 2.500000e+05
## resXY.mean.log1p 1.242922e+01
## resXY.mean.root2 5.000000e+02
## resY.mad.nexp 1.000000e+00
## resY.mean 5.000000e+02
## resY.mean.log1p 6.216606e+00
## resY.mean.root2 2.236068e+01
## min.outdoor.fctr.Final..rcv.glmnet.Y
## CorBG.mad 0.000000e+00
## CorBG.mean 3.973767e-01
## CorGR.mad 0.000000e+00
## CorGR.mean 9.720700e-02
## CorRB.mad 0.000000e+00
## CosSmlBG.mad 0.000000e+00
## CosSmlBG.mean 7.560239e-01
## CosSmlGR.mad 0.000000e+00
## CosSmlGR.mean 7.321306e-01
## CosSmlRB.mad 0.000000e+00
## CosSmlRB.mean 7.321306e-01
## lumB.mad.mad 0.000000e+00
## lumB.mad.mean 1.046541e-01
## lumB.mean.mad 0.000000e+00
## lumB.mean.mean 1.977313e-01
## lumG.mad.mad 0.000000e+00
## lumG.mean.mad 0.000000e+00
## lumG.mean.mean 1.422352e-01
## lumR.mad.mad 0.000000e+00
## lumR.mad.mean 1.104682e-01
## lumR.mean.mad 0.000000e+00
## lumR.mean.mean 2.684265e-01
## nImgs 1.000000e+00
## nImgs.log1p 6.931472e-01
## nImgs.root2 1.000000e+00
## resX.mean 3.045000e+02
## resX.mean.log1p 5.721950e+00
## resX.mean.root2 1.744993e+01
## resXY.mean 1.058460e+05
## resXY.mean.log1p 1.156975e+01
## resXY.mean.root2 3.253398e+02
## resY.mad.nexp 8.904719e-122
## resY.mean 2.670000e+02
## resY.mean.log1p 5.590987e+00
## resY.mean.root2 1.634013e+01
## [1] "newobs outdoor.fctr.Final..rcv.glmnet Y: max > max of Train range: 10000"
## business_id outdoor.fctr.Final..rcv.glmnet .pos CorBG.mad CorBG.mean
## 2001 003sg Y 2001 0.03723268 0.9366112
## 2002 00er5 Y 2002 0.03533531 0.9389177
## 2003 00kad Y 2003 0.03863244 0.9306837
## 2004 00mc6 Y 2004 0.02866452 0.9567892
## 2005 00q7x Y 2005 0.04471961 0.9154456
## 2006 00v0t Y 2006 0.03440394 0.9308481
## CorGR.mad CorGR.mean CorRB.mean CosSmlBG.mad CosSmlBG.mean
## 2001 0.09445512 0.8225389 0.9234642 0.015259026 0.9753027
## 2002 0.10907080 0.8300391 0.9273531 0.013282678 0.9771841
## 2003 0.11372265 0.8023874 0.9182336 0.015996107 0.9698392
## 2004 0.07665867 0.8669218 0.9234340 0.006016074 0.9870320
## 2005 0.11942459 0.8074720 0.9438535 0.020119313 0.9667608
## 2006 0.08337055 0.8182460 0.9226490 0.012121655 0.9734977
## CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean lumB.mad.mad
## 2001 0.03674287 0.9380160 0.03674287 0.9380160 0.12068015
## 2002 0.03258809 0.9392338 0.03258809 0.9392338 0.09482012
## 2003 0.03936943 0.9191075 0.03936943 0.9191075 0.08620011
## 2004 0.01058266 0.9652022 0.01058266 0.9652022 0.12930016
## 2005 0.04959311 0.9343829 0.04959311 0.9343829 0.09913012
## 2006 0.02881393 0.9444844 0.02881393 0.9444844 0.06465008
## lumB.mad.mean lumB.mean.mad lumB.mean.mean lumG.mad.mean
## 2001 0.2452722 0.12870733 0.4319603 0.2306847
## 2002 0.2396801 0.12587582 0.4072438 0.2180571
## 2003 0.2284318 0.14216707 0.4185144 0.1997114
## 2004 0.2469062 0.08988226 0.3974832 0.2244249
## 2005 0.2168181 0.14363447 0.4238079 0.2034941
## 2006 0.2376521 0.19723244 0.4272948 0.2211787
## lumG.mean.mad lumG.mean.mean lumR.mad.mad lumR.mad.mean lumR.mean.mad
## 2001 0.1333716 0.3687703 0.09482012 0.2440537 0.1438853
## 2002 0.1255282 0.3492009 0.11206014 0.2423380 0.1504310
## 2003 0.1178874 0.3399648 0.09482012 0.2308835 0.1669751
## 2004 0.1885424 0.3597017 0.12930016 0.2550460 0.1605529
## 2005 0.1454250 0.3517850 0.09051011 0.2039786 0.1773963
## 2006 0.1035705 0.3522046 0.08189010 0.2306267 0.1772157
## lumR.mean.mean nImgs nImgs.log1p nImgs.nexp nImgs.root2
## 2001 0.5010181 167 5.123964 2.970445e-73 12.922848
## 2002 0.4816302 210 5.351858 6.282881e-92 14.491377
## 2003 0.5022230 83 4.430817 8.985826e-37 9.110434
## 2004 0.5002178 15 2.772589 3.059023e-07 3.872983
## 2005 0.4855456 24 3.218876 3.775135e-11 4.898979
## 2006 0.4932310 24 3.218876 3.775135e-11 4.898979
## resX.mean.nexp resXY.mad resXY.mad.log1p resXY.mad.root2 resXY.mean
## 2001 3.225211e-191 0 0 0 185382.6
## 2002 2.758174e-188 0 0 0 181563.1
## 2003 5.050852e-187 0 0 0 182564.8
## 2004 6.231837e-192 0 0 0 191000.0
## 2005 2.589612e-194 0 0 0 190645.8
## 2006 3.224442e-195 0 0 0 188770.8
## resXY.mean.log1p resXY.mean.root2 resY.mad resY.mad.log1p
## 2001 12.13018 430.5608 0.0000 0.000000
## 2002 12.10936 426.1022 0.0000 0.000000
## 2003 12.11487 427.2760 0.0000 0.000000
## 2004 12.16003 437.0355 0.0000 0.000000
## 2005 12.15818 436.6301 92.6625 4.539698
## 2006 12.14829 434.4777 92.6625 4.539698
## resY.mad.root2 resY.mean.nexp
## 2001 0.000000 4.335495e-188
## 2002 0.000000 8.526171e-188
## 2003 0.000000 8.016718e-190
## 2004 0.000000 1.437642e-192
## 2005 9.626136 7.025181e-190
## 2006 9.626136 2.399065e-187
## business_id outdoor.fctr.Final..rcv.glmnet .pos CorBG.mad
## 3111 4051c Y 3111 0.02872110
## 5473 cb1v8 Y 5473 0.06250771
## 6175 ewq0g Y 6175 0.04002702
## 8809 oftao Y 8809 0.02403192
## 9248 pzr3p Y 9248 0.04163603
## 11902 zn3oa Y 11902 0.06239006
## CorBG.mean CorGR.mad CorGR.mean CorRB.mean CosSmlBG.mad
## 3111 0.9401452 0.10031738 0.8278789 0.9297429 0.013574627
## 5473 0.9253915 0.15170834 0.7945024 0.9282731 0.017213009
## 6175 0.9289115 0.11326076 0.8104939 0.9164378 0.017328557
## 8809 0.9469657 0.08951274 0.8454349 0.9352056 0.009328001
## 9248 0.9314986 0.11284603 0.8079982 0.9151733 0.015961113
## 11902 0.8837188 0.14630719 0.7418329 0.8975160 0.035299314
## CosSmlBG.mean CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean
## 3111 0.9773881 0.03185742 0.9393367 0.03185742 0.9393367
## 5473 0.9721751 0.05255970 0.9322791 0.05255970 0.9322791
## 6175 0.9700440 0.04114004 0.9270431 0.04114004 0.9270431
## 8809 0.9768335 0.03753497 0.9382425 0.03753497 0.9382425
## 9248 0.9730300 0.04118877 0.9337317 0.04118877 0.9337317
## 11902 0.9423003 0.07168470 0.8780887 0.07168470 0.8780887
## lumB.mad.mad lumB.mad.mean lumB.mean.mad lumB.mean.mean
## 3111 0.09482012 0.2386648 0.1418353 0.4169483
## 5473 0.07973510 0.2247840 0.1322573 0.4475920
## 6175 0.10775014 0.2228411 0.1378717 0.3782070
## 8809 0.12930016 0.2507338 0.1344458 0.3955917
## 9248 0.08620011 0.2395982 0.1230191 0.4212239
## 11902 0.08189010 0.2084776 0.1046371 0.3012358
## lumG.mad.mean lumG.mean.mad lumG.mean.mean lumR.mad.mad
## 3111 0.2206505 0.1288117 0.3591182 0.10344013
## 5473 0.2031521 0.1357753 0.3720498 0.09482012
## 6175 0.1997851 0.1135518 0.3291324 0.08620011
## 8809 0.2145773 0.1248811 0.3173069 0.09913012
## 9248 0.2273359 0.1186928 0.3546633 0.08620011
## 11902 0.1312329 0.1032154 0.2158150 0.09482012
## lumR.mad.mean lumR.mean.mad lumR.mean.mean nImgs nImgs.log1p
## 3111 0.2330413 0.1488915 0.4935259 61 4.127134
## 5473 0.2255536 0.1720477 0.5236533 34 3.555348
## 6175 0.2227409 0.1230461 0.4669705 58 4.077537
## 8809 0.2558212 0.1096981 0.4981574 32 3.496508
## 9248 0.2330720 0.1365547 0.5041447 298 5.700444
## 11902 0.2849956 0.1155834 0.4402610 56 4.043051
## nImgs.nexp nImgs.root2 resX.mean.nexp resXY.mad resXY.mad.log1p
## 3111 3.221340e-27 7.810250 1.615271e-195 0.0 0.000000
## 5473 1.713908e-15 5.830952 7.900500e-192 741.3 6.609753
## 6175 6.470235e-26 7.615773 5.356061e-197 0.0 0.000000
## 8809 1.266417e-14 5.656854 2.694379e-190 0.0 0.000000
## 9248 3.804034e-130 17.262677 9.404655e-193 0.0 0.000000
## 11902 4.780893e-25 7.483315 5.059170e-203 0.0 0.000000
## resXY.mad.root2 resXY.mean resXY.mean.log1p resXY.mean.root2
## 3111 0.00000 186573.8 12.13659 431.9419
## 5473 27.22683 186521.8 12.13631 431.8817
## 6175 0.00000 186617.2 12.13682 431.9922
## 8809 0.00000 182203.1 12.11288 426.8526
## 9248 0.00000 183746.0 12.12131 428.6560
## 11902 0.00000 190526.8 12.15755 436.4937
## resY.mad resY.mad.log1p resY.mad.root2 resY.mean.nexp
## 3111 139.3644 4.944242 11.805270 3.877891e-185
## 5473 11.1195 2.494816 3.334591 5.548548e-185
## 6175 114.1602 4.746324 10.684578 5.383921e-183
## 8809 92.6625 4.539698 9.626136 1.454377e-186
## 9248 139.3644 4.944242 11.805270 2.615522e-185
## 11902 0.0000 0.000000 0.000000 4.562642e-181
## business_id outdoor.fctr.Final..rcv.glmnet .pos CorBG.mad
## 11995 zyrif Y 11995 0.04093565
## 11996 zyvg6 Y 11996 0.02920210
## 11997 zyvjj Y 11997 0.05992721
## 11998 zz8g4 Y 11998 0.03775119
## 11999 zzxkg Y 11999 0.03390739
## 12000 zzxwm Y 12000 0.06401419
## CorBG.mean CorGR.mad CorGR.mean CorRB.mean CosSmlBG.mad
## 11995 0.9256048 0.08376190 0.8309741 0.9234027 0.01168123
## 11996 0.9438330 0.05111297 0.8292495 0.9109043 0.01091818
## 11997 0.8961274 0.11638684 0.7407158 0.8991711 0.02048514
## 11998 0.9302153 0.08587094 0.8233839 0.9309808 0.01691021
## 11999 0.9330440 0.12244687 0.8163253 0.9220607 0.01573925
## 12000 0.9344287 0.14104179 0.8443600 0.9278260 0.02338239
## CosSmlBG.mean CosSmlGR.mad CosSmlGR.mean CosSmlRB.mad CosSmlRB.mean
## 11995 0.9741601 0.03064421 0.9478580 0.03064421 0.9478580
## 11996 0.9820475 0.03243603 0.9517401 0.03243603 0.9517401
## 11997 0.9555059 0.05113108 0.9121136 0.05113108 0.9121136
## 11998 0.9687239 0.03741167 0.9333981 0.03741167 0.9333981
## 11999 0.9728899 0.04679900 0.9294468 0.04679900 0.9294468
## 12000 0.9761018 0.05131306 0.9373979 0.05131306 0.9373979
## lumB.mad.mad lumB.mad.mean lumB.mean.mad lumB.mean.mean
## 11995 0.09482012 0.2404693 0.08057952 0.4589377
## 11996 0.07327009 0.2038575 0.09177765 0.4499935
## 11997 0.11206014 0.2454850 0.10525375 0.4544731
## 11998 0.09913012 0.2531112 0.13684456 0.4162767
## 11999 0.11206014 0.2540845 0.12439807 0.4034418
## 12000 0.17240022 0.2325647 0.16271547 0.3768968
## lumG.mad.mean lumG.mean.mad lumG.mean.mean lumR.mad.mad
## 11995 0.2417105 0.10813525 0.3960581 0.09482012
## 11996 0.2020406 0.11370989 0.3710309 0.12499016
## 11997 0.2512991 0.09570397 0.3633369 0.12930016
## 11998 0.2331067 0.11953365 0.3384051 0.10344013
## 11999 0.2281852 0.13179104 0.3399353 0.09913012
## 12000 0.1941021 0.14659912 0.3320116 0.08620011
## lumR.mad.mean lumR.mean.mad lumR.mean.mean nImgs nImgs.log1p
## 11995 0.2353738 0.1008384 0.5176209 89 4.499810
## 11996 0.2147590 0.1272109 0.5513465 16 2.833213
## 11997 0.2603433 0.1918240 0.5363046 27 3.332205
## 11998 0.2412366 0.1619514 0.4996036 118 4.779123
## 11999 0.2495351 0.1469719 0.4861776 154 5.043425
## 12000 0.2285395 0.2050777 0.4525386 13 2.639057
## nImgs.nexp nImgs.root2 resX.mean.nexp resXY.mad resXY.mad.log1p
## 11995 2.227364e-39 9.433981 2.552892e-180 1482.6 7.302227
## 11996 1.125352e-07 4.000000 4.256714e-188 0.0 0.000000
## 11997 1.879529e-12 5.196152 1.115844e-200 1482.6 7.302227
## 11998 5.665668e-52 10.862780 5.049362e-193 0.0 0.000000
## 11999 1.314165e-67 12.409674 1.561603e-190 0.0 0.000000
## 12000 2.260329e-06 3.605551 8.694857e-179 1482.6 7.302227
## resXY.mad.root2 resXY.mean resXY.mean.log1p resXY.mean.root2
## 11995 38.50455 175542.2 12.07564 418.9776
## 11996 0.00000 188375.0 12.14620 434.0219
## 11997 38.50455 179504.8 12.09796 423.6801
## 11998 0.00000 183586.2 12.12045 428.4697
## 11999 0.00000 182534.7 12.11470 427.2408
## 12000 38.50455 161115.2 11.98988 401.3916
## resY.mad resY.mad.log1p resY.mad.root2 resY.mean.nexp
## 11995 0.0000 0.000000 0.000000 1.321107e-189
## 11996 0.0000 0.000000 0.000000 4.010869e-194
## 11997 2.9652 1.377556 1.721976 2.968218e-173
## 11998 169.0164 5.135895 13.000631 4.895966e-185
## 11999 106.7472 4.679788 10.331854 5.383963e-186
## 12000 185.3250 5.227492 13.613413 2.605171e-174
## id cor.y exclude.as.feat cor.y.abs
## .pos .pos 0.027497300 FALSE 0.027497300
## CorBG.mad CorBG.mad 0.003604604 FALSE 0.003604604
## CorBG.mean CorBG.mean 0.016157691 FALSE 0.016157691
## CorGR.mad CorGR.mad 0.030253289 FALSE 0.030253289
## CorGR.mean CorGR.mean 0.004925319 FALSE 0.004925319
## CorRB.mean CorRB.mean -0.009617034 FALSE 0.009617034
## CosSmlBG.mad CosSmlBG.mad -0.046206836 FALSE 0.046206836
## CosSmlBG.mean CosSmlBG.mean 0.030023372 FALSE 0.030023372
## CosSmlGR.mad CosSmlGR.mad -0.003587615 FALSE 0.003587615
## CosSmlGR.mean CosSmlGR.mean 0.021022718 FALSE 0.021022718
## CosSmlRB.mad CosSmlRB.mad -0.003587615 FALSE 0.003587615
## CosSmlRB.mean CosSmlRB.mean 0.021022718 FALSE 0.021022718
## lumB.mad.mad lumB.mad.mad 0.038630626 FALSE 0.038630626
## lumB.mad.mean lumB.mad.mean 0.019323904 FALSE 0.019323904
## lumB.mean.mad lumB.mean.mad 0.049622224 FALSE 0.049622224
## lumB.mean.mean lumB.mean.mean -0.032529239 FALSE 0.032529239
## lumG.mad.mean lumG.mad.mean 0.075062422 FALSE 0.075062422
## lumG.mean.mad lumG.mean.mad 0.051616272 FALSE 0.051616272
## lumG.mean.mean lumG.mean.mean 0.052492718 FALSE 0.052492718
## lumR.mad.mad lumR.mad.mad 0.013705157 FALSE 0.013705157
## lumR.mad.mean lumR.mad.mean 0.036065638 FALSE 0.036065638
## lumR.mean.mad lumR.mean.mad 0.015642413 FALSE 0.015642413
## lumR.mean.mean lumR.mean.mean -0.115393376 FALSE 0.115393376
## nImgs nImgs -0.014963676 FALSE 0.014963676
## nImgs.log1p nImgs.log1p 0.047250893 FALSE 0.047250893
## nImgs.nexp nImgs.nexp -0.003435316 FALSE 0.003435316
## nImgs.root2 nImgs.root2 0.014028124 FALSE 0.014028124
## resX.mean.nexp resX.mean.nexp -0.022433472 FALSE 0.022433472
## resXY.mad resXY.mad -0.011946049 FALSE 0.011946049
## resXY.mad.log1p resXY.mad.log1p -0.014055066 FALSE 0.014055066
## resXY.mad.root2 resXY.mad.root2 -0.011364822 FALSE 0.011364822
## resXY.mean resXY.mean -0.009002880 FALSE 0.009002880
## resXY.mean.log1p resXY.mean.log1p -0.004867571 FALSE 0.004867571
## resXY.mean.root2 resXY.mean.root2 -0.007039955 FALSE 0.007039955
## resY.mad resY.mad 0.007630633 FALSE 0.007630633
## resY.mad.log1p resY.mad.log1p -0.001526058 FALSE 0.001526058
## resY.mad.root2 resY.mad.root2 0.002557583 FALSE 0.002557583
## resY.mean.nexp resY.mean.nexp -0.022433472 FALSE 0.022433472
## cor.high.X freqRatio percentUnique zeroVar
## .pos <NA> 1.000000 100.00 FALSE
## CorBG.mad <NA> 1.000000 100.00 FALSE
## CorBG.mean CosSmlBG.mean 1.000000 99.90 FALSE
## CorGR.mad <NA> 1.000000 100.00 FALSE
## CorGR.mean <NA> 2.000000 99.95 FALSE
## CorRB.mean CorRB.mad 1.000000 100.00 FALSE
## CosSmlBG.mad <NA> 1.000000 100.00 FALSE
## CosSmlBG.mean CosSmlBG.mad 1.000000 99.70 FALSE
## CosSmlGR.mad <NA> 1.000000 100.00 FALSE
## CosSmlGR.mean CosSmlBG.mean 1.000000 99.70 FALSE
## CosSmlRB.mad <NA> 1.000000 100.00 FALSE
## CosSmlRB.mean CosSmlGR.mean 1.000000 99.70 FALSE
## lumB.mad.mad <NA> 1.022624 2.75 FALSE
## lumB.mad.mean <NA> 1.200000 92.35 FALSE
## lumB.mean.mad lumG.mean.mad 1.000000 100.00 FALSE
## lumB.mean.mean lumG.mean.mean 2.000000 99.95 FALSE
## lumG.mad.mean lumG.mad.mean.cut.fctr 1.000000 96.00 FALSE
## lumG.mean.mad <NA> 2.000000 99.95 FALSE
## lumG.mean.mean <NA> 2.000000 99.95 FALSE
## lumR.mad.mad <NA> 1.020576 2.30 FALSE
## lumR.mad.mean <NA> 1.142857 93.95 FALSE
## lumR.mean.mad <NA> 2.000000 99.95 FALSE
## lumR.mean.mean <NA> 1.000000 100.00 FALSE
## nImgs <NA> 1.033333 19.10 FALSE
## nImgs.log1p nImgs.cut.fctr 1.033333 19.10 FALSE
## nImgs.nexp <NA> 1.193548 17.35 FALSE
## nImgs.root2 nImgs.log1p 1.033333 19.10 FALSE
## resX.mean.nexp <NA> 2.000000 97.75 FALSE
## resXY.mad <NA> 9.568047 4.35 FALSE
## resXY.mad.log1p resXY.mad.nexp 9.568047 4.35 FALSE
## resXY.mad.root2 resXY.mad 9.568047 4.35 FALSE
## resXY.mean <NA> 6.000000 98.55 FALSE
## resXY.mean.log1p <NA> 4.000000 90.80 FALSE
## resXY.mean.root2 <NA> 6.000000 98.20 FALSE
## resY.mad <NA> 5.354497 9.05 FALSE
## resY.mad.log1p <NA> 5.354497 9.05 FALSE
## resY.mad.root2 <NA> 5.354497 9.05 FALSE
## resY.mean.nexp resX.mean.nexp 1.666667 98.15 FALSE
## nzv is.cor.y.abs.low interaction.feat
## .pos FALSE FALSE NA
## CorBG.mad FALSE TRUE NA
## CorBG.mean FALSE FALSE NA
## CorGR.mad FALSE FALSE NA
## CorGR.mean FALSE TRUE NA
## CorRB.mean FALSE FALSE NA
## CosSmlBG.mad FALSE FALSE NA
## CosSmlBG.mean FALSE FALSE NA
## CosSmlGR.mad FALSE TRUE NA
## CosSmlGR.mean FALSE FALSE NA
## CosSmlRB.mad FALSE TRUE NA
## CosSmlRB.mean FALSE FALSE NA
## lumB.mad.mad FALSE FALSE NA
## lumB.mad.mean FALSE FALSE NA
## lumB.mean.mad FALSE FALSE NA
## lumB.mean.mean FALSE FALSE NA
## lumG.mad.mean FALSE FALSE NA
## lumG.mean.mad FALSE FALSE NA
## lumG.mean.mean FALSE FALSE NA
## lumR.mad.mad FALSE FALSE NA
## lumR.mad.mean FALSE FALSE NA
## lumR.mean.mad FALSE FALSE NA
## lumR.mean.mean FALSE FALSE NA
## nImgs FALSE FALSE NA
## nImgs.log1p FALSE FALSE NA
## nImgs.nexp FALSE TRUE NA
## nImgs.root2 FALSE FALSE NA
## resX.mean.nexp FALSE FALSE NA
## resXY.mad FALSE FALSE NA
## resXY.mad.log1p FALSE FALSE NA
## resXY.mad.root2 FALSE FALSE NA
## resXY.mean FALSE FALSE NA
## resXY.mean.log1p FALSE TRUE NA
## resXY.mean.root2 FALSE TRUE NA
## resY.mad FALSE TRUE NA
## resY.mad.log1p FALSE TRUE NA
## resY.mad.root2 FALSE TRUE NA
## resY.mean.nexp FALSE FALSE NA
## shapiro.test.p.value rsp_var_raw id_var rsp_var
## .pos 2.145811e-24 FALSE NA NA
## CorBG.mad 1.066454e-34 FALSE NA NA
## CorBG.mean 8.214498e-35 FALSE NA NA
## CorGR.mad 7.392587e-28 FALSE NA NA
## CorGR.mean 1.178044e-23 FALSE NA NA
## CorRB.mean 1.946414e-36 FALSE NA NA
## CosSmlBG.mad 7.694894e-38 FALSE NA NA
## CosSmlBG.mean 1.504448e-37 FALSE NA NA
## CosSmlGR.mad 1.304267e-33 FALSE NA NA
## CosSmlGR.mean 9.960727e-29 FALSE NA NA
## CosSmlRB.mad 1.304267e-33 FALSE NA NA
## CosSmlRB.mean 9.960727e-29 FALSE NA NA
## lumB.mad.mad 5.263514e-19 FALSE NA NA
## lumB.mad.mean 2.584492e-12 FALSE NA NA
## lumB.mean.mad 1.492315e-16 FALSE NA NA
## lumB.mean.mean 2.980872e-09 FALSE NA NA
## lumG.mad.mean 1.165498e-07 FALSE NA NA
## lumG.mean.mad 6.299586e-18 FALSE NA NA
## lumG.mean.mean 1.000052e-02 FALSE NA NA
## lumR.mad.mad 1.539005e-12 FALSE NA NA
## lumR.mad.mean 5.440725e-06 FALSE NA NA
## lumR.mean.mad 3.610555e-14 FALSE NA NA
## lumR.mean.mean 4.578209e-07 FALSE NA NA
## nImgs 1.364097e-61 FALSE NA NA
## nImgs.log1p 1.234907e-13 FALSE NA NA
## nImgs.nexp 1.763177e-72 FALSE NA NA
## nImgs.root2 4.118632e-46 FALSE NA NA
## resX.mean.nexp 1.194234e-72 FALSE NA NA
## resXY.mad 9.894151e-67 FALSE NA NA
## resXY.mad.log1p 3.868763e-59 FALSE NA NA
## resXY.mad.root2 1.509232e-62 FALSE NA NA
## resXY.mean 2.964553e-36 FALSE NA NA
## resXY.mean.log1p 6.980019e-43 FALSE NA NA
## resXY.mean.root2 1.780045e-39 FALSE NA NA
## resY.mad 3.711302e-48 FALSE NA NA
## resY.mad.log1p 3.133148e-49 FALSE NA NA
## resY.mad.root2 1.717662e-47 FALSE NA NA
## resY.mean.nexp 1.194234e-72 FALSE NA NA
## max min max.outdoor.fctr.N
## .pos 1.200000e+04 1.000000e+00 2.000000e+03
## CorBG.mad 2.243888e-01 0.000000e+00 1.255981e-01
## CorBG.mean 9.873230e-01 3.973767e-01 9.754135e-01
## CorGR.mad 4.591960e-01 0.000000e+00 2.865703e-01
## CorGR.mean 9.563650e-01 9.720700e-02 9.143933e-01
## CorRB.mean 9.814793e-01 6.664379e-01 9.720959e-01
## CosSmlBG.mad 8.290404e-02 0.000000e+00 5.922465e-02
## CosSmlBG.mean 9.952656e-01 7.560239e-01 9.919500e-01
## CosSmlGR.mad 1.971821e-01 0.000000e+00 1.262114e-01
## CosSmlGR.mean 9.832886e-01 7.321306e-01 9.748654e-01
## CosSmlRB.mad 1.971821e-01 0.000000e+00 1.262114e-01
## CosSmlRB.mean 9.832886e-01 7.321306e-01 9.748654e-01
## lumB.mad.mad 2.672203e-01 0.000000e+00 2.672203e-01
## lumB.mad.mean 3.621365e-01 1.046541e-01 3.621365e-01
## lumB.mean.mad 3.437349e-01 0.000000e+00 2.318285e-01
## lumB.mean.mean 6.404673e-01 1.977313e-01 5.616800e-01
## lumG.mad.mean 3.604753e-01 7.599882e-02 3.496776e-01
## lumG.mean.mad 3.773683e-01 0.000000e+00 2.696606e-01
## lumG.mean.mean 5.993138e-01 1.422352e-01 5.253529e-01
## lumR.mad.mad 2.930804e-01 0.000000e+00 1.939502e-01
## lumR.mad.mean 3.837318e-01 1.104682e-01 3.662894e-01
## lumR.mean.mad 3.627523e-01 0.000000e+00 2.805100e-01
## lumR.mean.mean 6.986750e-01 2.684265e-01 6.691801e-01
## nImgs 2.974000e+03 1.000000e+00 2.974000e+03
## nImgs.log1p 7.997999e+00 6.931472e-01 7.997999e+00
## nImgs.nexp 3.678794e-01 0.000000e+00 1.353353e-01
## nImgs.root2 5.453439e+01 1.000000e+00 5.453439e+01
## resX.mean.nexp 5.762208e-124 7.124576e-218 5.762208e-124
## resXY.mad 1.237971e+05 0.000000e+00 1.237971e+05
## resXY.mad.log1p 1.172641e+01 0.000000e+00 1.172641e+01
## resXY.mad.root2 3.518481e+02 0.000000e+00 3.518481e+02
## resXY.mean 2.500000e+05 8.762615e+04 2.175740e+05
## resXY.mean.log1p 1.242922e+01 1.138085e+01 1.229030e+01
## resXY.mean.root2 5.000000e+02 2.960172e+02 4.664483e+02
## resY.mad 2.787288e+02 0.000000e+00 1.942206e+02
## resY.mad.log1p 5.633821e+00 0.000000e+00 5.274130e+00
## resY.mad.root2 1.669517e+01 0.000000e+00 1.393631e+01
## resY.mean.nexp 1.328912e-110 7.124576e-218 1.328912e-110
## max.outdoor.fctr.Y min.outdoor.fctr.N min.outdoor.fctr.Y
## .pos 1.997000e+03 2.000000e+00 1.000000e+00
## CorBG.mad 1.740408e-01 7.895506e-03 1.192589e-03
## CorBG.mean 9.763031e-01 6.990869e-01 7.976347e-01
## CorGR.mad 2.998611e-01 2.569433e-03 3.041371e-02
## CorGR.mean 9.168114e-01 5.526847e-01 6.038797e-01
## CorRB.mean 9.727625e-01 7.476792e-01 6.664379e-01
## CosSmlBG.mad 7.349383e-02 3.181644e-03 2.835269e-03
## CosSmlBG.mean 9.919660e-01 8.793939e-01 8.990240e-01
## CosSmlGR.mad 1.410021e-01 5.660636e-03 9.706923e-03
## CosSmlGR.mean 9.774375e-01 8.416038e-01 7.730400e-01
## CosSmlRB.mad 1.410021e-01 5.660636e-03 9.706923e-03
## CosSmlRB.mean 9.774375e-01 8.416038e-01 7.730400e-01
## lumB.mad.mad 2.068803e-01 8.620011e-03 2.586003e-02
## lumB.mad.mean 3.130679e-01 1.094111e-01 1.217545e-01
## lumB.mean.mad 3.251330e-01 2.497502e-02 2.948638e-02
## lumB.mean.mean 5.887858e-01 2.374392e-01 2.302221e-01
## lumG.mad.mean 3.357184e-01 1.146528e-01 7.599882e-02
## lumG.mean.mad 3.267359e-01 1.148923e-02 2.485611e-02
## lumG.mean.mean 5.311267e-01 1.817476e-01 1.453933e-01
## lumR.mad.mad 2.068803e-01 2.586003e-02 1.293002e-02
## lumR.mad.mean 3.166458e-01 1.437851e-01 1.411135e-01
## lumR.mean.mad 2.855381e-01 3.823550e-03 2.119836e-02
## lumR.mean.mean 6.617843e-01 2.979887e-01 2.699415e-01
## nImgs 1.954000e+03 2.000000e+00 2.000000e+00
## nImgs.log1p 7.578145e+00 1.098612e+00 1.098612e+00
## nImgs.nexp 1.353353e-01 0.000000e+00 0.000000e+00
## nImgs.root2 4.420407e+01 1.414214e+00 1.414214e+00
## resX.mean.nexp 1.209672e-151 7.124576e-218 7.124576e-218
## resXY.mad 1.078962e+05 0.000000e+00 0.000000e+00
## resXY.mad.log1p 1.158893e+01 0.000000e+00 0.000000e+00
## resXY.mad.root2 3.284756e+02 0.000000e+00 0.000000e+00
## resXY.mean 2.182778e+05 8.762615e+04 1.137250e+05
## resXY.mean.log1p 1.229353e+01 1.138085e+01 1.164155e+01
## resXY.mean.root2 4.672021e+02 2.960172e+02 3.372314e+02
## resY.mad 1.853250e+02 0.000000e+00 0.000000e+00
## resY.mad.log1p 5.227492e+00 0.000000e+00 0.000000e+00
## resY.mad.root2 1.361341e+01 0.000000e+00 0.000000e+00
## resY.mean.nexp 2.530221e-127 7.667025e-212 7.124576e-218
## max.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 1.997000e+03
## CorBG.mad 9.640539e-02
## CorBG.mean 9.638528e-01
## CorGR.mad 2.206953e-01
## CorGR.mean 8.924409e-01
## CorRB.mean 9.572325e-01
## CosSmlBG.mad 7.349383e-02
## CosSmlBG.mean 9.900264e-01
## CosSmlGR.mad 1.410021e-01
## CosSmlGR.mean 9.734692e-01
## CosSmlRB.mad 1.410021e-01
## CosSmlRB.mean 9.734692e-01
## lumB.mad.mad 1.896402e-01
## lumB.mad.mean 3.021552e-01
## lumB.mean.mad 2.000383e-01
## lumB.mean.mean 5.140038e-01
## lumG.mad.mean 2.750078e-01
## lumG.mean.mad 1.858523e-01
## lumG.mean.mean 4.361654e-01
## lumR.mad.mad 1.939502e-01
## lumR.mad.mean 2.889616e-01
## lumR.mean.mad 2.472881e-01
## lumR.mean.mean 6.527598e-01
## nImgs 2.020000e+02
## nImgs.log1p 5.313206e+00
## nImgs.nexp 3.354626e-04
## nImgs.root2 1.421267e+01
## resX.mean.nexp 2.297245e-167
## resXY.mad 6.968220e+04
## resXY.mad.log1p 1.115171e+01
## resXY.mad.root2 2.639739e+02
## resXY.mean 2.010079e+05
## resXY.mean.log1p 1.221110e+01
## resXY.mean.root2 4.483391e+02
## resY.mad 1.942206e+02
## resY.mad.log1p 5.274130e+00
## resY.mad.root2 1.393631e+01
## resY.mean.nexp 1.802525e-146
## max.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.998000e+03
## CorBG.mad 1.135332e-01
## CorBG.mean 9.763031e-01
## CorGR.mad 2.998611e-01
## CorGR.mean 9.159928e-01
## CorRB.mean 9.727625e-01
## CosSmlBG.mad 5.448611e-02
## CosSmlBG.mean 9.919660e-01
## CosSmlGR.mad 1.318410e-01
## CosSmlGR.mean 9.774375e-01
## CosSmlRB.mad 1.318410e-01
## CosSmlRB.mean 9.774375e-01
## lumB.mad.mad 2.068803e-01
## lumB.mad.mean 3.338965e-01
## lumB.mean.mad 2.318285e-01
## lumB.mean.mean 5.464919e-01
## lumG.mad.mean 3.330659e-01
## lumG.mean.mad 2.446775e-01
## lumG.mean.mean 5.108060e-01
## lumR.mad.mad 1.896402e-01
## lumR.mad.mean 3.662894e-01
## lumR.mean.mad 2.396022e-01
## lumR.mean.mean 6.691801e-01
## nImgs 2.239000e+03
## nImgs.log1p 7.714231e+00
## nImgs.nexp 1.353353e-01
## nImgs.root2 4.731807e+01
## resX.mean.nexp 1.046231e-160
## resXY.mad 7.931910e+04
## resXY.mad.log1p 1.128125e+01
## resXY.mad.root2 2.816365e+02
## resXY.mean 2.182778e+05
## resXY.mean.log1p 1.229353e+01
## resXY.mean.root2 4.672021e+02
## resY.mad 1.853250e+02
## resY.mad.log1p 5.227492e+00
## resY.mad.root2 1.361341e+01
## resY.mean.nexp 5.494851e-136
## min.outdoor.fctr.All.X..rcv.glmnet.N
## .pos 5.000000e+00
## CorBG.mad 1.744157e-02
## CorBG.mean 8.254616e-01
## CorGR.mad 3.270109e-02
## CorGR.mean 6.682450e-01
## CorRB.mean 8.443430e-01
## CosSmlBG.mad 5.967801e-03
## CosSmlBG.mean 9.096928e-01
## CosSmlGR.mad 1.543990e-02
## CosSmlGR.mean 7.730400e-01
## CosSmlRB.mad 1.543990e-02
## CosSmlRB.mean 7.730400e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.220965e-01
## lumB.mean.mad 5.342117e-02
## lumB.mean.mean 2.351705e-01
## lumG.mad.mean 7.599882e-02
## lumG.mean.mad 3.819614e-02
## lumG.mean.mean 1.453933e-01
## lumR.mad.mad 4.310005e-02
## lumR.mad.mean 1.437851e-01
## lumR.mean.mad 3.556016e-02
## lumR.mean.mean 4.160745e-01
## nImgs 8.000000e+00
## nImgs.log1p 2.197225e+00
## nImgs.nexp 1.872900e-88
## nImgs.root2 2.828427e+00
## resX.mean.nexp 2.657429e-204
## resXY.mad 0.000000e+00
## resXY.mad.log1p 0.000000e+00
## resXY.mad.root2 0.000000e+00
## resXY.mean 1.445588e+05
## resXY.mean.log1p 1.188145e+01
## resXY.mean.root2 3.802089e+02
## resY.mad 0.000000e+00
## resY.mad.log1p 0.000000e+00
## resY.mad.root2 0.000000e+00
## resY.mean.nexp 2.192657e-207
## min.outdoor.fctr.All.X..rcv.glmnet.Y
## .pos 1.000000e+00
## CorBG.mad 8.948235e-03
## CorBG.mean 6.990869e-01
## CorGR.mad 2.569433e-03
## CorGR.mean 5.722977e-01
## CorRB.mean 6.664379e-01
## CosSmlBG.mad 2.835269e-03
## CosSmlBG.mean 8.793939e-01
## CosSmlGR.mad 5.660636e-03
## CosSmlGR.mean 8.441096e-01
## CosSmlRB.mad 5.660636e-03
## CosSmlRB.mean 8.441096e-01
## lumB.mad.mad 2.586003e-02
## lumB.mad.mean 1.367933e-01
## lumB.mean.mad 2.497502e-02
## lumB.mean.mean 2.374392e-01
## lumG.mad.mean 1.172004e-01
## lumG.mean.mad 1.148923e-02
## lumG.mean.mean 2.011802e-01
## lumR.mad.mad 2.586003e-02
## lumR.mad.mean 1.473628e-01
## lumR.mean.mad 3.823550e-03
## lumR.mean.mean 2.979887e-01
## nImgs 2.000000e+00
## nImgs.log1p 1.098612e+00
## nImgs.nexp 0.000000e+00
## nImgs.root2 1.414214e+00
## resX.mean.nexp 7.124576e-218
## resXY.mad 0.000000e+00
## resXY.mad.log1p 0.000000e+00
## resXY.mad.root2 0.000000e+00
## resXY.mean 1.453448e+05
## resXY.mean.log1p 1.188687e+01
## resXY.mean.root2 3.812411e+02
## resY.mad 0.000000e+00
## resY.mad.log1p 0.000000e+00
## resY.mad.root2 0.000000e+00
## resY.mean.nexp 7.667025e-212
## max.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 1.200000e+04
## CorBG.mad 2.243888e-01
## CorBG.mean 9.873230e-01
## CorGR.mad 4.591960e-01
## CorGR.mean 9.563650e-01
## CorRB.mean 9.814793e-01
## CosSmlBG.mad 8.290404e-02
## CosSmlBG.mean 9.952656e-01
## CosSmlGR.mad 1.971821e-01
## CosSmlGR.mean 9.832886e-01
## CosSmlRB.mad 1.971821e-01
## CosSmlRB.mean 9.832886e-01
## lumB.mad.mad 2.629103e-01
## lumB.mad.mean 3.461636e-01
## lumB.mean.mad 3.437349e-01
## lumB.mean.mean 6.404673e-01
## lumG.mad.mean 3.604753e-01
## lumG.mean.mad 3.773683e-01
## lumG.mean.mean 5.993138e-01
## lumR.mad.mad 2.930804e-01
## lumR.mad.mean 3.837318e-01
## lumR.mean.mad 3.627523e-01
## lumR.mean.mean 6.986750e-01
## nImgs 2.825000e+03
## nImgs.log1p 7.946618e+00
## nImgs.nexp 3.678794e-01
## nImgs.root2 5.315073e+01
## resX.mean.nexp 5.719134e-133
## resXY.mad 1.086375e+05
## resXY.mad.log1p 1.159578e+01
## resXY.mad.root2 3.296021e+02
## resXY.mean 2.500000e+05
## resXY.mean.log1p 1.242922e+01
## resXY.mean.root2 5.000000e+02
## resY.mad 2.787288e+02
## resY.mad.log1p 5.633821e+00
## resY.mad.root2 1.669517e+01
## resY.mean.nexp 1.105028e-116
## min.outdoor.fctr.Final..rcv.glmnet.Y
## .pos 2.001000e+03
## CorBG.mad 0.000000e+00
## CorBG.mean 3.973767e-01
## CorGR.mad 0.000000e+00
## CorGR.mean 9.720700e-02
## CorRB.mean 6.930769e-01
## CosSmlBG.mad 0.000000e+00
## CosSmlBG.mean 7.560239e-01
## CosSmlGR.mad 0.000000e+00
## CosSmlGR.mean 7.321306e-01
## CosSmlRB.mad 0.000000e+00
## CosSmlRB.mean 7.321306e-01
## lumB.mad.mad 0.000000e+00
## lumB.mad.mean 1.046541e-01
## lumB.mean.mad 0.000000e+00
## lumB.mean.mean 1.977313e-01
## lumG.mad.mean 9.128165e-02
## lumG.mean.mad 0.000000e+00
## lumG.mean.mean 1.422352e-01
## lumR.mad.mad 0.000000e+00
## lumR.mad.mean 1.104682e-01
## lumR.mean.mad 0.000000e+00
## lumR.mean.mean 2.684265e-01
## nImgs 1.000000e+00
## nImgs.log1p 6.931472e-01
## nImgs.nexp 0.000000e+00
## nImgs.root2 1.000000e+00
## resX.mean.nexp 7.124576e-218
## resXY.mad 0.000000e+00
## resXY.mad.log1p 0.000000e+00
## resXY.mad.root2 0.000000e+00
## resXY.mean 1.058460e+05
## resXY.mean.log1p 1.156975e+01
## resXY.mean.root2 3.253398e+02
## resY.mad 0.000000e+00
## resY.mad.log1p 0.000000e+00
## resY.mad.root2 0.000000e+00
## resY.mean.nexp 7.124576e-218
## [1] "newobs total range outliers: 10000"
## [1] TRUE
## [1] "ObsNew output class tables:"
## lunch.-1 dinner.-1 reserve.2 outdoor.3 expensive.-1
## 10000 10000 10000 10000 10000
## liquor.5 table.6 classy.-1 kids.8
## 10000 10000 10000 10000
## [1] 0.2
## [1] "glb_sel_mdl_id: All.X##rcv#glmnet"
## [1] "glb_fin_mdl_id: Final##rcv#glmnet"
## [1] "Cross Validation issues:"
## MFO###myMFO_classfr Random###myrandom_classfr
## 0 0
## Max.cor.Y.rcv.1X1###glmnet
## 0
## max.Accuracy.OOB max.AUCROCR.OOB
## Interact.High.cor.Y##rcv#glmnet 0.5356068 0.6175050
## Low.cor.X##rcv#glmnet 0.5285858 0.5990704
## All.X##rcv#glmnet 0.5185557 0.6040644
## Max.cor.Y.rcv.1X1###glmnet 0.5035105 0.6006841
## All.X##rcv#glm 0.5015045 0.6019115
## Max.cor.Y##rcv#rpart 0.5015045 0.5152716
## MFO###myMFO_classfr 0.5015045 0.5000000
## Random###myrandom_classfr 0.5015045 0.4974668
## Final##rcv#glmnet NA NA
## max.AUCpROC.OOB max.Accuracy.fit
## Interact.High.cor.Y##rcv#glmnet 0.5736479 0.5998532
## Low.cor.X##rcv#glmnet 0.5737143 0.5955435
## All.X##rcv#glmnet 0.5886781 0.6001859
## Max.cor.Y.rcv.1X1###glmnet 0.5736479 0.5144566
## All.X##rcv#glm 0.5826358 0.5875394
## Max.cor.Y##rcv#rpart 0.5152716 0.5569916
## MFO###myMFO_classfr 0.5000000 0.5014955
## Random###myrandom_classfr 0.5084970 0.5014955
## Final##rcv#glmnet NA 0.5921722
## opt.prob.threshold.fit
## Interact.High.cor.Y##rcv#glmnet 0.4
## Low.cor.X##rcv#glmnet 0.4
## All.X##rcv#glmnet 0.4
## Max.cor.Y.rcv.1X1###glmnet 0.3
## All.X##rcv#glm 0.4
## Max.cor.Y##rcv#rpart 0.5
## MFO###myMFO_classfr 0.4
## Random###myrandom_classfr 0.4
## Final##rcv#glmnet 0.4
## opt.prob.threshold.OOB
## Interact.High.cor.Y##rcv#glmnet 0.3
## Low.cor.X##rcv#glmnet 0.4
## All.X##rcv#glmnet 0.2
## Max.cor.Y.rcv.1X1###glmnet 0.2
## All.X##rcv#glm 0.0
## Max.cor.Y##rcv#rpart 0.2
## MFO###myMFO_classfr 0.4
## Random###myrandom_classfr 0.4
## Final##rcv#glmnet NA
## [1] "All.X##rcv#glmnet OOB confusion matrix & accuracy: "
## Prediction
## Reference N Y
## N 26 471
## Y 9 491
## err.abs.fit.sum err.abs.OOB.sum err.abs.trn.sum
## (0.22,0.23] 52.70722 55.29970 112.3045
## (0.21,0.22] 46.81490 51.65088 105.2764
## (0.23,0.37] 184.14207 192.79373 397.1284
## (0.07,0.21] 163.40703 170.30356 355.3907
## err.abs.new.sum .freqRatio.Fit .freqRatio.OOB .freqRatio.Tst
## (0.22,0.23] NA 0.1156530 0.1143430 0.2192
## (0.21,0.22] NA 0.1076770 0.1073220 0.2591
## (0.23,0.37] NA 0.4107677 0.4122367 0.2416
## (0.07,0.21] NA 0.3659023 0.3660983 0.2801
## .n.Fit .n.New.Y .n.OOB .n.Trn.N .n.Trn.Y .n.Tst .n.fit .n.new
## (0.22,0.23] 116 2192 114 119 111 2192 116 2192
## (0.21,0.22] 108 2591 107 108 107 2591 108 2591
## (0.23,0.37] 412 2416 411 367 456 2416 412 2416
## (0.07,0.21] 367 2801 365 403 329 2801 367 2801
## .n.trn err.abs.OOB.mean err.abs.fit.mean err.abs.new.mean
## (0.22,0.23] 230 0.4850851 0.4543726 NA
## (0.21,0.22] 215 0.4827185 0.4334713 NA
## (0.23,0.37] 823 0.4690845 0.4469468 NA
## (0.07,0.21] 732 0.4665851 0.4452508 NA
## err.abs.trn.mean
## (0.22,0.23] 0.4882805
## (0.21,0.22] 0.4896575
## (0.23,0.37] 0.4825376
## (0.07,0.21] 0.4855065
## err.abs.fit.sum err.abs.OOB.sum err.abs.trn.sum err.abs.new.sum
## 447.071216 470.047879 970.100024 NA
## .freqRatio.Fit .freqRatio.OOB .freqRatio.Tst .n.Fit
## 1.000000 1.000000 1.000000 1003.000000
## .n.New.Y .n.OOB .n.Trn.N .n.Trn.Y
## 10000.000000 997.000000 997.000000 1003.000000
## .n.Tst .n.fit .n.new .n.trn
## 10000.000000 1003.000000 10000.000000 2000.000000
## err.abs.OOB.mean err.abs.fit.mean err.abs.new.mean err.abs.trn.mean
## 1.903473 1.780041 NA 1.945982
## [1] "Features Importance for selected models:"
## All.X..rcv.glmnet.imp Final..rcv.glmnet.imp
## resX.mean.nexp 1.000000e+02 0.00000
## resY.mean.nexp 1.000000e+02 0.00000
## lumG.mad.mean 1.481575e-33 44.66925
## lumR.mean.mean 1.185983e-33 100.00000
## lumG.mean.mad 8.658961e-34 20.01553
## lumG.mad.mad 7.009418e-34 12.23265
## CorRB.mad 1.970257e-34 39.98867
## lumG.mean.mean 1.950337e-34 32.21896
## [1] "glbObsNew prediction stats:"
##
## N Y
## 0 10000
## label step_major step_minor label_minor bgn end
## 22 predict.data.new 10 0 0 337.218 355.004
## 23 display.session.info 11 0 0 355.004 NA
## elapsed
## 22 17.786
## 23 NA
Null Hypothesis (\(\sf{H_{0}}\)): mpg is not impacted by am_fctr.
The variance by am_fctr appears to be independent. #{r q1, cache=FALSE} # print(t.test(subset(cars_df, am_fctr == "automatic")$mpg, # subset(cars_df, am_fctr == "manual")$mpg, # var.equal=FALSE)$conf) # We reject the null hypothesis i.e. we have evidence to conclude that am_fctr impacts mpg (95% confidence). Manual transmission is better for miles per gallon versus automatic transmission.
## label step_major step_minor label_minor bgn
## 1 import.data 1 0 0 9.012
## 16 fit.models 8 0 0 228.228
## 2 inspect.data 2 0 0 163.962
## 17 fit.models 8 1 1 272.517
## 22 predict.data.new 10 0 0 337.218
## 20 fit.data.training 9 0 0 316.077
## 3 scrub.data 2 1 1 207.934
## 18 fit.models 8 2 2 300.743
## 21 fit.data.training 9 1 1 330.427
## 19 fit.models 8 3 3 311.751
## 15 select.features 7 0 0 224.932
## 14 partition.data.training 6 0 0 222.815
## 11 extract.features.end 3 6 6 221.262
## 12 manage.missing.data 4 0 0 222.240
## 13 cluster.data 5 0 0 222.727
## 4 transform.data 2 2 2 220.922
## 10 extract.features.string 3 5 5 221.204
## 7 extract.features.image 3 2 2 221.055
## 9 extract.features.text 3 4 4 221.149
## 8 extract.features.price 3 3 3 221.112
## 6 extract.features.datetime 3 1 1 221.018
## 5 extract.features 3 0 0 220.996
## end elapsed duration
## 1 163.961 154.949 154.949
## 16 272.516 44.288 44.288
## 2 207.933 43.971 43.971
## 17 300.742 28.226 28.225
## 22 355.004 17.786 17.786
## 20 330.427 14.350 14.350
## 3 220.921 12.988 12.987
## 18 311.751 11.008 11.008
## 21 337.217 6.791 6.790
## 19 316.077 4.326 4.326
## 15 228.228 3.296 3.296
## 14 224.932 2.117 2.117
## 11 222.239 0.977 0.977
## 12 222.726 0.487 0.486
## 13 222.814 0.088 0.087
## 4 220.995 0.074 0.073
## 10 221.261 0.057 0.057
## 7 221.111 0.056 0.056
## 9 221.203 0.054 0.054
## 8 221.149 0.037 0.037
## 6 221.054 0.037 0.036
## 5 221.017 0.022 0.021
## [1] "Total Elapsed Time: 355.004 secs"